diff -Nru xen-4.9.0/Config.mk xen-4.9.2/Config.mk --- xen-4.9.0/Config.mk 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/Config.mk 2018-03-28 13:10:55.000000000 +0000 @@ -268,8 +268,8 @@ MINIOS_UPSTREAM_URL ?= git://xenbits.xen.org/mini-os.git endif OVMF_UPSTREAM_REVISION ?= 5920a9d16b1ab887c2858224316a98e961d71b05 -QEMU_UPSTREAM_REVISION ?= qemu-xen-4.9.0 -MINIOS_UPSTREAM_REVISION ?= xen-RELEASE-4.9.0 +QEMU_UPSTREAM_REVISION ?= qemu-xen-4.9.2 +MINIOS_UPSTREAM_REVISION ?= xen-RELEASE-4.9.2 # Wed Feb 22 11:03:37 2017 +0000 # Include libxendevicemodel with libxc @@ -280,7 +280,7 @@ ETHERBOOT_NICS ?= rtl8139 8086100e -QEMU_TRADITIONAL_REVISION ?= xen-4.9.0 +QEMU_TRADITIONAL_REVISION ?= xen-4.9.2 # Thu Mar 9 11:14:55 2017 +0000 # cirrus/vnc: zap drop bitblit support from console code. diff -Nru xen-4.9.0/debian/changelog xen-4.9.2/debian/changelog --- xen-4.9.0/debian/changelog 2018-04-06 16:35:43.000000000 +0000 +++ xen-4.9.2/debian/changelog 2018-04-12 09:54:57.000000000 +0000 @@ -1,3 +1,43 @@ +xen (4.9.2-0ubuntu1) bionic; urgency=medium + + * Update to upstream 4.9.2 release (LP: #1763354). + Changes include numerous bugfixes, including security fixes/updates. + 4.9.0 -> 4.9.1: + - XSA-226 / CVE-2017-12135 (replacement) + - XSA-227 / CVE-2017-12137 (replacement) + - XSA-228 / CVE-2017-12136 (replacement) + - XSA-230 / CVE-2017-12855 (replacement) + - XSA-231 / CVE-2017-14316 (replacement) + - XSA-232 / CVE-2017-14318 (replacement) + - XSA-233 / CVE-2017-14317 (replacement) + - XSA-234 / CVE-2017-14319 (replacement) + - XSA-235 / CVE-2017-15596 (replacement) + - XSA-236 / CVE-2017-15597 (new) + - XSA-237 / CVE-2017-15590 (replacement) + - XSA-238 / CVE-2017-15591 (replacement) + - XSA-239 / CVE-2017-15589 (replacement) + - XSA-240 / CVE-2017-15595 (update) + - XSA-241 / CVE-2017-15588 (replacement) + - XSA-242 / CVE-2017-15593 (replacement) + - XSA-243 / CVE-2017-15592 (replacement) + - XSA-244 / CVE-2017-15594 (replacement) + - XSA-245 / CVE-2017-17046 (replacement) + 4.9.1 -> 4.9.2: + - XSA-246 / CVE-2017-17044 (new) + - XSA-247 / CVE-2017-17045 (new) + - XSA-248 / CVE-2017-17566 (new) + - XSA-249 / CVE-2017-17563 (new) + - XSA-250 / CVE-2017-17564 (new) + - XSA-251 / CVE-2017-17565 (new) + - XSA-252 / CVE-2018-7540 (new) + - XSA-254 / CVE-2017-5754 (new / XPTI Meltdown mitigation) + - XSA-255 / CVE-2018-7541 (new) + - XSA-256 / CVE-2018-7542 (new) + * Dropped: + d/p/ubuntu/tools-fix-ftbs-arm.patch (upstream) + + -- Stefan Bader Thu, 12 Apr 2018 11:54:57 +0200 + xen (4.9.0-0ubuntu4) bionic; urgency=medium * Compile and ship vhd-util. diff -Nru xen-4.9.0/debian/patches/series xen-4.9.2/debian/patches/series --- xen-4.9.0/debian/patches/series 2018-04-06 16:35:43.000000000 +0000 +++ xen-4.9.2/debian/patches/series 2018-04-12 09:54:57.000000000 +0000 @@ -29,7 +29,6 @@ # Not in Debian (yet) ubuntu/tools-xs-test-hardening.patch -ubuntu/tools-fix-ftbs-arm.patch ubuntu/tools-fake-xs-restrict.patch ubuntu/static-link-vhd-util.patch @@ -47,32 +46,4 @@ ubuntu/local-mce/0011-x86-vmce-tools-libxl-expose-LMCE-capability-in-guest.patch ubuntu/local-mce/0012-x86-mce-add-support-of-vLMCE-injection-to-XEN_MC_inj.patch -# From 4.9.1 stable branch -upstream-4.9.1-tools-libxl-Fix-a-segment-fault-when-mmio_hole-is-se.patch - # Security updates -xsa226-4.9-0001.patch -xsa226-4.9-0002.patch -xsa-227-4.9.y.patch -xsa-228-4.9.y.patch -xsa-230-4.9.y.patch -xsa231-4.9.patch -xsa232.patch -xsa233.patch -xsa234-4.9.patch -xsa235-4.9.patch -xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch -xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch -xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch -xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch -xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch -xsa238.patch -xsa239.patch -xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch -xsa240-4.9-0002-x86-mm-Disable-PV-linear-pagetables-by-default.patch -xsa241-4.9.patch -xsa242-4.9.patch -xsa243.patch -xsa244.patch -xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch -xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch diff -Nru xen-4.9.0/debian/patches/ubuntu/tools-fix-ftbs-arm.patch xen-4.9.2/debian/patches/ubuntu/tools-fix-ftbs-arm.patch --- xen-4.9.0/debian/patches/ubuntu/tools-fix-ftbs-arm.patch 2017-08-16 09:07:46.000000000 +0000 +++ xen-4.9.2/debian/patches/ubuntu/tools-fix-ftbs-arm.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ -Description: Fix build failures on arm platforms -Author: Stefan Bader -Index: xen-4.9.0/tools/libxc/xc_dom_arm.c -=================================================================== ---- xen-4.9.0.orig/tools/libxc/xc_dom_arm.c -+++ xen-4.9.0/tools/libxc/xc_dom_arm.c -@@ -223,6 +223,7 @@ static int set_mode(xc_interface *xch, d - - domctl.domain = domid; - domctl.cmd = XEN_DOMCTL_set_address_size; -+ domctl.u.address_size.size = 0; - for ( i = 0; i < ARRAY_SIZE(types); i++ ) - if ( !strcmp(types[i].guest, guest_type) ) - domctl.u.address_size.size = types[i].size; diff -Nru xen-4.9.0/debian/patches/xsa226-4.9-0001.patch xen-4.9.2/debian/patches/xsa226-4.9-0001.patch --- xen-4.9.0/debian/patches/xsa226-4.9-0001.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa226-4.9-0001.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,149 +0,0 @@ -From: Jan Beulich -Subject: gnttab: don't use possibly unbounded tail calls - -There is no guarantee that the compiler would actually translate them -to branches instead of calls, so only ones with a known recursion limit -are okay: -- __release_grant_for_copy() can call itself only once, as - __acquire_grant_for_copy() won't permit use of multi-level transitive - grants, -- __acquire_grant_for_copy() is fine to call itself with the last - argument false, as that prevents further recursion, -- __acquire_grant_for_copy() must not call itself to recover from an - observed change to the active entry's pin count - -This is part of CVE-2017-12135 / XSA-226. - -Signed-off-by: Jan Beulich - ---- a/xen/common/compat/grant_table.c -+++ b/xen/common/compat/grant_table.c -@@ -258,9 +258,9 @@ int compat_grant_table_op(unsigned int cmd, - rc = gnttab_copy(guest_handle_cast(nat.uop, gnttab_copy_t), n); - if ( rc > 0 ) - { -- ASSERT(rc < n); -- i -= n - rc; -- n = rc; -+ ASSERT(rc <= n); -+ i -= rc; -+ n -= rc; - } - if ( rc >= 0 ) - { ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2103,8 +2103,10 @@ __release_grant_for_copy( - - if ( td != rd ) - { -- /* Recursive calls, but they're tail calls, so it's -- okay. */ -+ /* -+ * Recursive calls, but they're bounded (acquire permits only a single -+ * level of transitivity), so it's okay. -+ */ - if ( released_write ) - __release_grant_for_copy(td, trans_gref, 0); - else if ( released_read ) -@@ -2255,10 +2257,11 @@ __acquire_grant_for_copy( - return rc; - } - -- /* We dropped the lock, so we have to check that nobody -- else tried to pin (or, for that matter, unpin) the -- reference in *this* domain. If they did, just give up -- and try again. */ -+ /* -+ * We dropped the lock, so we have to check that nobody else tried -+ * to pin (or, for that matter, unpin) the reference in *this* -+ * domain. If they did, just give up and tell the caller to retry. -+ */ - if ( act->pin != old_pin ) - { - __fixup_status_for_copy_pin(act, status); -@@ -2266,9 +2269,8 @@ __acquire_grant_for_copy( - active_entry_release(act); - grant_read_unlock(rgt); - put_page(*page); -- return __acquire_grant_for_copy(rd, gref, ldom, readonly, -- frame, page, page_off, length, -- allow_transitive); -+ *page = NULL; -+ return ERESTART; - } - - /* The actual remote remote grant may or may not be a -@@ -2574,7 +2576,7 @@ static int gnttab_copy_one(const struct - { - gnttab_copy_release_buf(src); - rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref); -- if ( rc < 0 ) -+ if ( rc ) - goto out; - } - -@@ -2584,7 +2586,7 @@ static int gnttab_copy_one(const struct - { - gnttab_copy_release_buf(dest); - rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref); -- if ( rc < 0 ) -+ if ( rc ) - goto out; - } - -@@ -2593,6 +2595,14 @@ static int gnttab_copy_one(const struct - return rc; - } - -+/* -+ * gnttab_copy(), other than the various other helpers of -+ * do_grant_table_op(), returns (besides possible error indicators) -+ * "count - i" rather than "i" to ensure that even if no progress -+ * was made at all (perhaps due to gnttab_copy_one() returning a -+ * positive value) a non-zero value is being handed back (zero needs -+ * to be avoided, as that means "success, all done"). -+ */ - static long gnttab_copy( - XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count) - { -@@ -2606,7 +2616,7 @@ static long gnttab_copy( - { - if ( i && hypercall_preempt_check() ) - { -- rc = i; -+ rc = count - i; - break; - } - -@@ -2616,13 +2626,20 @@ static long gnttab_copy( - break; - } - -- op.status = gnttab_copy_one(&op, &dest, &src); -- if ( op.status != GNTST_okay ) -+ rc = gnttab_copy_one(&op, &dest, &src); -+ if ( rc > 0 ) -+ { -+ rc = count - i; -+ break; -+ } -+ if ( rc != GNTST_okay ) - { - gnttab_copy_release_buf(&src); - gnttab_copy_release_buf(&dest); - } - -+ op.status = rc; -+ rc = 0; - if ( unlikely(__copy_field_to_guest(uop, &op, status)) ) - { - rc = -EFAULT; -@@ -3160,6 +3177,7 @@ do_grant_table_op( - rc = gnttab_copy(copy, count); - if ( rc > 0 ) - { -+ rc = count - rc; - guest_handle_add_offset(copy, rc); - uop = guest_handle_cast(copy, void); - } diff -Nru xen-4.9.0/debian/patches/xsa226-4.9-0002.patch xen-4.9.2/debian/patches/xsa226-4.9-0002.patch --- xen-4.9.0/debian/patches/xsa226-4.9-0002.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa226-4.9-0002.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,280 +0,0 @@ -From: Jan Beulich -Subject: gnttab: fix transitive grant handling - -Processing of transitive grants must not use the fast path, or else -reference counting breaks due to the skipped recursive call to -__acquire_grant_for_copy() (its __release_grant_for_copy() -counterpart occurs independent of original pin count). Furthermore -after re-acquiring temporarily dropped locks we need to verify no grant -properties changed if the original pin count was non-zero; checking -just the pin counts is sufficient only for well-behaved guests. As a -result, __release_grant_for_copy() needs to mirror that new behavior. - -Furthermore a __release_grant_for_copy() invocation was missing on the -retry path of __acquire_grant_for_copy(), and gnttab_set_version() also -needs to bail out upon encountering a transitive grant. - -This is part of CVE-2017-12135 / XSA-226. - -Reported-by: Andrew Cooper -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2050,13 +2050,8 @@ __release_grant_for_copy( - unsigned long r_frame; - uint16_t *status; - grant_ref_t trans_gref; -- int released_read; -- int released_write; - struct domain *td; - -- released_read = 0; -- released_write = 0; -- - grant_read_lock(rgt); - - act = active_entry_acquire(rgt, gref); -@@ -2086,17 +2081,11 @@ __release_grant_for_copy( - - act->pin -= GNTPIN_hstw_inc; - if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) ) -- { -- released_write = 1; - gnttab_clear_flag(_GTF_writing, status); -- } - } - - if ( !act->pin ) -- { - gnttab_clear_flag(_GTF_reading, status); -- released_read = 1; -- } - - active_entry_release(act); - grant_read_unlock(rgt); -@@ -2104,13 +2093,10 @@ __release_grant_for_copy( - if ( td != rd ) - { - /* -- * Recursive calls, but they're bounded (acquire permits only a single -+ * Recursive call, but it is bounded (acquire permits only a single - * level of transitivity), so it's okay. - */ -- if ( released_write ) -- __release_grant_for_copy(td, trans_gref, 0); -- else if ( released_read ) -- __release_grant_for_copy(td, trans_gref, 1); -+ __release_grant_for_copy(td, trans_gref, readonly); - - rcu_unlock_domain(td); - } -@@ -2184,8 +2170,108 @@ __acquire_grant_for_copy( - act->domid, ldom, act->pin); - - old_pin = act->pin; -- if ( !act->pin || -- (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) ) -+ if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive ) -+ { -+ if ( (!old_pin || (!readonly && -+ !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)))) && -+ (rc = _set_status_v2(ldom, readonly, 0, shah, act, -+ status)) != GNTST_okay ) -+ goto unlock_out; -+ -+ if ( !allow_transitive ) -+ PIN_FAIL(unlock_out_clear, GNTST_general_error, -+ "transitive grant when transitivity not allowed\n"); -+ -+ trans_domid = sha2->transitive.trans_domid; -+ trans_gref = sha2->transitive.gref; -+ barrier(); /* Stop the compiler from re-loading -+ trans_domid from shared memory */ -+ if ( trans_domid == rd->domain_id ) -+ PIN_FAIL(unlock_out_clear, GNTST_general_error, -+ "transitive grants cannot be self-referential\n"); -+ -+ /* -+ * We allow the trans_domid == ldom case, which corresponds to a -+ * grant being issued by one domain, sent to another one, and then -+ * transitively granted back to the original domain. Allowing it -+ * is easy, and means that you don't need to go out of your way to -+ * avoid it in the guest. -+ */ -+ -+ /* We need to leave the rrd locked during the grant copy. */ -+ td = rcu_lock_domain_by_id(trans_domid); -+ if ( td == NULL ) -+ PIN_FAIL(unlock_out_clear, GNTST_general_error, -+ "transitive grant referenced bad domain %d\n", -+ trans_domid); -+ -+ /* -+ * __acquire_grant_for_copy() could take the lock on the -+ * remote table (if rd == td), so we have to drop the lock -+ * here and reacquire. -+ */ -+ active_entry_release(act); -+ grant_read_unlock(rgt); -+ -+ rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, -+ readonly, &grant_frame, page, -+ &trans_page_off, &trans_length, 0); -+ -+ grant_read_lock(rgt); -+ act = active_entry_acquire(rgt, gref); -+ -+ if ( rc != GNTST_okay ) -+ { -+ __fixup_status_for_copy_pin(act, status); -+ rcu_unlock_domain(td); -+ active_entry_release(act); -+ grant_read_unlock(rgt); -+ return rc; -+ } -+ -+ /* -+ * We dropped the lock, so we have to check that the grant didn't -+ * change, and that nobody else tried to pin/unpin it. If anything -+ * changed, just give up and tell the caller to retry. -+ */ -+ if ( rgt->gt_version != 2 || -+ act->pin != old_pin || -+ (old_pin && (act->domid != ldom || act->frame != grant_frame || -+ act->start != trans_page_off || -+ act->length != trans_length || -+ act->trans_domain != td || -+ act->trans_gref != trans_gref || -+ !act->is_sub_page)) ) -+ { -+ __release_grant_for_copy(td, trans_gref, readonly); -+ __fixup_status_for_copy_pin(act, status); -+ rcu_unlock_domain(td); -+ active_entry_release(act); -+ grant_read_unlock(rgt); -+ put_page(*page); -+ *page = NULL; -+ return ERESTART; -+ } -+ -+ if ( !old_pin ) -+ { -+ act->domid = ldom; -+ act->start = trans_page_off; -+ act->length = trans_length; -+ act->trans_domain = td; -+ act->trans_gref = trans_gref; -+ act->frame = grant_frame; -+ act->gfn = -1ul; -+ /* -+ * The actual remote remote grant may or may not be a sub-page, -+ * but we always treat it as one because that blocks mappings of -+ * transitive grants. -+ */ -+ act->is_sub_page = 1; -+ } -+ } -+ else if ( !old_pin || -+ (!readonly && !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) ) - { - if ( (rc = _set_status(rgt->gt_version, ldom, - readonly, 0, shah, act, -@@ -2206,79 +2292,6 @@ __acquire_grant_for_copy( - trans_page_off = 0; - trans_length = PAGE_SIZE; - } -- else if ( (shah->flags & GTF_type_mask) == GTF_transitive ) -- { -- if ( !allow_transitive ) -- PIN_FAIL(unlock_out_clear, GNTST_general_error, -- "transitive grant when transitivity not allowed\n"); -- -- trans_domid = sha2->transitive.trans_domid; -- trans_gref = sha2->transitive.gref; -- barrier(); /* Stop the compiler from re-loading -- trans_domid from shared memory */ -- if ( trans_domid == rd->domain_id ) -- PIN_FAIL(unlock_out_clear, GNTST_general_error, -- "transitive grants cannot be self-referential\n"); -- -- /* We allow the trans_domid == ldom case, which -- corresponds to a grant being issued by one domain, sent -- to another one, and then transitively granted back to -- the original domain. Allowing it is easy, and means -- that you don't need to go out of your way to avoid it -- in the guest. */ -- -- /* We need to leave the rrd locked during the grant copy */ -- td = rcu_lock_domain_by_id(trans_domid); -- if ( td == NULL ) -- PIN_FAIL(unlock_out_clear, GNTST_general_error, -- "transitive grant referenced bad domain %d\n", -- trans_domid); -- -- /* -- * __acquire_grant_for_copy() could take the lock on the -- * remote table (if rd == td), so we have to drop the lock -- * here and reacquire -- */ -- active_entry_release(act); -- grant_read_unlock(rgt); -- -- rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, -- readonly, &grant_frame, page, -- &trans_page_off, &trans_length, 0); -- -- grant_read_lock(rgt); -- act = active_entry_acquire(rgt, gref); -- -- if ( rc != GNTST_okay ) { -- __fixup_status_for_copy_pin(act, status); -- rcu_unlock_domain(td); -- active_entry_release(act); -- grant_read_unlock(rgt); -- return rc; -- } -- -- /* -- * We dropped the lock, so we have to check that nobody else tried -- * to pin (or, for that matter, unpin) the reference in *this* -- * domain. If they did, just give up and tell the caller to retry. -- */ -- if ( act->pin != old_pin ) -- { -- __fixup_status_for_copy_pin(act, status); -- rcu_unlock_domain(td); -- active_entry_release(act); -- grant_read_unlock(rgt); -- put_page(*page); -- *page = NULL; -- return ERESTART; -- } -- -- /* The actual remote remote grant may or may not be a -- sub-page, but we always treat it as one because that -- blocks mappings of transitive grants. */ -- is_sub_page = 1; -- act->gfn = -1ul; -- } - else if ( !(sha2->hdr.flags & GTF_sub_page) ) - { - rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, readonly, rd); -@@ -2710,10 +2723,13 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA - case 2: - for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ ) - { -- if ( ((shared_entry_v2(gt, i).hdr.flags & GTF_type_mask) == -- GTF_permit_access) && -- (shared_entry_v2(gt, i).full_page.frame >> 32) ) -+ switch ( shared_entry_v2(gt, i).hdr.flags & GTF_type_mask ) - { -+ case GTF_permit_access: -+ if ( !(shared_entry_v2(gt, i).full_page.frame >> 32) ) -+ break; -+ /* fall through */ -+ case GTF_transitive: - gdprintk(XENLOG_WARNING, - "tried to change grant table version to 1 with non-representable entries\n"); - res = -ERANGE; diff -Nru xen-4.9.0/debian/patches/xsa-227-4.9.y.patch xen-4.9.2/debian/patches/xsa-227-4.9.y.patch --- xen-4.9.0/debian/patches/xsa-227-4.9.y.patch 2017-08-16 12:20:34.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa-227-4.9.y.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,56 +0,0 @@ -From 46981065bd15cf35b4b5cdc5d2897748162d6123 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 15 Aug 2017 15:12:41 +0200 -Subject: [PATCH] x86/grant: disallow misaligned PTEs - -Pagetable entries must be aligned to function correctly. Disallow attempts -from the guest to have a grant PTE created at a misaligned address, which -would result in corruption of the L1 table with largely-guest-controlled -values. - -This is CVE-2017-12137 / XSA-227. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: ce442926c2530da9376199dcc769436376ad2386 -master date: 2017-08-15 15:06:45 +0200 - -Signed-off-by: Stefan Bader ---- - xen/arch/x86/mm.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 2dc7db9..39b5c17 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -4006,6 +4006,9 @@ static int create_grant_pte_mapping( - l1_pgentry_t ol1e; - struct domain *d = v->domain; - -+ if ( !IS_ALIGNED(pte_addr, sizeof(nl1e)) ) -+ return GNTST_general_error; -+ - adjust_guest_l1e(nl1e, d); - - gmfn = pte_addr >> PAGE_SHIFT; -@@ -4063,6 +4066,16 @@ static int destroy_grant_pte_mapping( - struct page_info *page; - l1_pgentry_t ol1e; - -+ /* -+ * addr comes from Xen's active_entry tracking so isn't guest controlled, -+ * but it had still better be PTE-aligned. -+ */ -+ if ( !IS_ALIGNED(addr, sizeof(ol1e)) ) -+ { -+ ASSERT_UNREACHABLE(); -+ return GNTST_general_error; -+ } -+ - gmfn = addr >> PAGE_SHIFT; - page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); - --- -2.7.4 - diff -Nru xen-4.9.0/debian/patches/xsa-228-4.9.y.patch xen-4.9.2/debian/patches/xsa-228-4.9.y.patch --- xen-4.9.0/debian/patches/xsa-228-4.9.y.patch 2017-08-16 12:20:41.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa-228-4.9.y.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,202 +0,0 @@ -From 266fc0ea45a5b56e2d507f84978ec86256280375 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Tue, 15 Aug 2017 15:14:02 +0200 -Subject: [PATCH] gnttab: split maptrack lock to make it fulfill its purpose - again - -The way the lock is currently being used in get_maptrack_handle(), it -protects only the maptrack limit: The function acts on current's list -only, so races on list accesses are impossible even without the lock. - -Otoh list access races are possible between __get_maptrack_handle() and -put_maptrack_handle(), due to the invocation of the former for other -than current from steal_maptrack_handle(). Introduce a per-vCPU lock -for list accesses to become race free again. This lock will be -uncontended except when it becomes necessary to take the steal path, -i.e. in the common case there should be no meaningful performance -impact. - -When in get_maptrack_handle adds a stolen entry to a fresh, empty, -freelist, we think that there is probably no concurrency. However, -this is not a fast path and adding the locking there makes the code -clearly correct. - -Also, while we are here: the stolen maptrack_entry's tail pointer was -not properly set. Set it. - -This is CVE-2017-12136 / XSA-228. - -Reported-by: Ian Jackson -Signed-off-by: Jan Beulich -Signed-off-by: Ian Jackson -master commit: 02cbeeb6207508b0f04a2c6181445c8eb3f1e117 -master date: 2017-08-15 15:07:25 +0200 - -Signed-off-by: Stefan Bader ---- - docs/misc/grant-tables.txt | 7 ++++++- - xen/common/grant_table.c | 30 ++++++++++++++++++++++++------ - xen/include/xen/grant_table.h | 2 +- - xen/include/xen/sched.h | 1 + - 4 files changed, 32 insertions(+), 8 deletions(-) - -diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt -index 417ce2d..64da5cf 100644 ---- a/docs/misc/grant-tables.txt -+++ b/docs/misc/grant-tables.txt -@@ -87,7 +87,8 @@ is complete. - inconsistent grant table state such as current - version, partially initialized active table pages, - etc. -- grant_table->maptrack_lock : spinlock used to protect the maptrack free list -+ grant_table->maptrack_lock : spinlock used to protect the maptrack limit -+ v->maptrack_freelist_lock : spinlock used to protect the maptrack free list - active_grant_entry->lock : spinlock used to serialize modifications to - active entries - -@@ -102,6 +103,10 @@ is complete. - The maptrack free list is protected by its own spinlock. The maptrack - lock may be locked while holding the grant table lock. - -+ The maptrack_freelist_lock is an innermost lock. It may be locked -+ while holding other locks, but no other locks may be acquired within -+ it. -+ - Active entries are obtained by calling active_entry_acquire(gt, ref). - This function returns a pointer to the active entry after locking its - spinlock. The caller must hold the grant table read lock before -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index 03de2be..983cee2 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -304,11 +304,16 @@ __get_maptrack_handle( - { - unsigned int head, next, prev_head; - -+ spin_lock(&v->maptrack_freelist_lock); -+ - do { - /* No maptrack pages allocated for this VCPU yet? */ - head = read_atomic(&v->maptrack_head); - if ( unlikely(head == MAPTRACK_TAIL) ) -+ { -+ spin_unlock(&v->maptrack_freelist_lock); - return -1; -+ } - - /* - * Always keep one entry in the free list to make it easier to -@@ -316,12 +321,17 @@ __get_maptrack_handle( - */ - next = read_atomic(&maptrack_entry(t, head).ref); - if ( unlikely(next == MAPTRACK_TAIL) ) -+ { -+ spin_unlock(&v->maptrack_freelist_lock); - return -1; -+ } - - prev_head = head; - head = cmpxchg(&v->maptrack_head, prev_head, next); - } while ( head != prev_head ); - -+ spin_unlock(&v->maptrack_freelist_lock); -+ - return head; - } - -@@ -380,6 +390,8 @@ put_maptrack_handle( - /* 2. Add entry to the tail of the list on the original VCPU. */ - v = currd->vcpu[maptrack_entry(t, handle).vcpu]; - -+ spin_lock(&v->maptrack_freelist_lock); -+ - cur_tail = read_atomic(&v->maptrack_tail); - do { - prev_tail = cur_tail; -@@ -388,6 +400,8 @@ put_maptrack_handle( - - /* 3. Update the old tail entry to point to the new entry. */ - write_atomic(&maptrack_entry(t, prev_tail).ref, handle); -+ -+ spin_unlock(&v->maptrack_freelist_lock); - } - - static inline int -@@ -411,10 +425,6 @@ get_maptrack_handle( - */ - if ( nr_maptrack_frames(lgt) >= max_maptrack_frames ) - { -- /* -- * Can drop the lock since no other VCPU can be adding a new -- * frame once they've run out. -- */ - spin_unlock(&lgt->maptrack_lock); - - /* -@@ -426,8 +436,12 @@ get_maptrack_handle( - handle = steal_maptrack_handle(lgt, curr); - if ( handle == -1 ) - return -1; -+ spin_lock(&curr->maptrack_freelist_lock); -+ maptrack_entry(lgt, handle).ref = MAPTRACK_TAIL; - curr->maptrack_tail = handle; -- write_atomic(&curr->maptrack_head, handle); -+ if ( curr->maptrack_head == MAPTRACK_TAIL ) -+ write_atomic(&curr->maptrack_head, handle); -+ spin_unlock(&curr->maptrack_freelist_lock); - } - return steal_maptrack_handle(lgt, curr); - } -@@ -460,12 +474,15 @@ get_maptrack_handle( - smp_wmb(); - lgt->maptrack_limit += MAPTRACK_PER_PAGE; - -+ spin_unlock(&lgt->maptrack_lock); -+ spin_lock(&curr->maptrack_freelist_lock); -+ - do { - new_mt[i - 1].ref = read_atomic(&curr->maptrack_head); - head = cmpxchg(&curr->maptrack_head, new_mt[i - 1].ref, handle + 1); - } while ( head != new_mt[i - 1].ref ); - -- spin_unlock(&lgt->maptrack_lock); -+ spin_unlock(&curr->maptrack_freelist_lock); - - return handle; - } -@@ -3473,6 +3490,7 @@ grant_table_destroy( - - void grant_table_init_vcpu(struct vcpu *v) - { -+ spin_lock_init(&v->maptrack_freelist_lock); - v->maptrack_head = MAPTRACK_TAIL; - v->maptrack_tail = MAPTRACK_TAIL; - } -diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h -index 4e77899..100f2b3 100644 ---- a/xen/include/xen/grant_table.h -+++ b/xen/include/xen/grant_table.h -@@ -78,7 +78,7 @@ struct grant_table { - /* Mapping tracking table per vcpu. */ - struct grant_mapping **maptrack; - unsigned int maptrack_limit; -- /* Lock protecting the maptrack page list, head, and limit */ -+ /* Lock protecting the maptrack limit */ - spinlock_t maptrack_lock; - /* The defined versions are 1 and 2. Set to 0 if we don't know - what version to use yet. */ -diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h -index 1127ca9..eedea39 100644 ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -230,6 +230,7 @@ struct vcpu - int controller_pause_count; - - /* Grant table map tracking. */ -+ spinlock_t maptrack_freelist_lock; - unsigned int maptrack_head; - unsigned int maptrack_tail; - --- -2.7.4 - diff -Nru xen-4.9.0/debian/patches/xsa-230-4.9.y.patch xen-4.9.2/debian/patches/xsa-230-4.9.y.patch --- xen-4.9.0/debian/patches/xsa-230-4.9.y.patch 2017-08-16 12:20:48.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa-230-4.9.y.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -From afc5ebfb5d9623d5fe26fa8b3b57721d1e003fcc Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Tue, 15 Aug 2017 15:14:36 +0200 -Subject: [PATCH] gnttab: correct pin status fixup for copy - -Regardless of copy operations only setting GNTPIN_hst*, GNTPIN_dev* -also need to be taken into account when deciding whether to clear -_GTF_{read,writ}ing. At least for consistency with code elsewhere the -read part better doesn't use any mask at all. - -This is XSA-230. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -master commit: 6e2a4c73564ab907b732059adb317d6ca2d138a2 -master date: 2017-08-15 15:08:03 +0200 - -Signed-off-by: Stefan Bader ---- - xen/common/grant_table.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index 983cee2..26d7fb7 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2122,10 +2122,10 @@ __release_grant_for_copy( - static void __fixup_status_for_copy_pin(const struct active_grant_entry *act, - uint16_t *status) - { -- if ( !(act->pin & GNTPIN_hstw_mask) ) -+ if ( !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) ) - gnttab_clear_flag(_GTF_writing, status); - -- if ( !(act->pin & GNTPIN_hstr_mask) ) -+ if ( !act->pin ) - gnttab_clear_flag(_GTF_reading, status); - } - -@@ -2333,7 +2333,7 @@ __acquire_grant_for_copy( - - unlock_out_clear: - if ( !(readonly) && -- !(act->pin & GNTPIN_hstw_mask) ) -+ !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) ) - gnttab_clear_flag(_GTF_writing, status); - - if ( !act->pin ) --- -2.7.4 - diff -Nru xen-4.9.0/debian/patches/xsa231-4.9.patch xen-4.9.2/debian/patches/xsa231-4.9.patch --- xen-4.9.0/debian/patches/xsa231-4.9.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa231-4.9.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,108 +0,0 @@ -From: George Dunlap -Subject: xen/mm: make sure node is less than MAX_NUMNODES - -The output of MEMF_get_node(memflags) can be as large as nodeid_t can -hold (currently 255). This is then used as an index to arrays of size -MAX_NUMNODE, which is 64 on x86 and 1 on ARM, can be passed in by an -untrusted guest (via memory_exchange and increase_reservation) and is -not currently bounds-checked. - -Check the value in page_alloc.c before using it, and also check the -value in the hypercall call sites and return -EINVAL if appropriate. -Don't permit domains other than the hardware or control domain to -allocate node-constrained memory. - -This is XSA-231. - -Reported-by: Matthew Daley -Signed-off-by: George Dunlap -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -411,6 +411,31 @@ static void decrease_reservation(struct - a->nr_done = i; - } - -+static bool propagate_node(unsigned int xmf, unsigned int *memflags) -+{ -+ const struct domain *currd = current->domain; -+ -+ BUILD_BUG_ON(XENMEMF_get_node(0) != NUMA_NO_NODE); -+ BUILD_BUG_ON(MEMF_get_node(0) != NUMA_NO_NODE); -+ -+ if ( XENMEMF_get_node(xmf) == NUMA_NO_NODE ) -+ return true; -+ -+ if ( is_hardware_domain(currd) || is_control_domain(currd) ) -+ { -+ if ( XENMEMF_get_node(xmf) >= MAX_NUMNODES ) -+ return false; -+ -+ *memflags |= MEMF_node(XENMEMF_get_node(xmf)); -+ if ( xmf & XENMEMF_exact_node_request ) -+ *memflags |= MEMF_exact_node; -+ } -+ else if ( xmf & XENMEMF_exact_node_request ) -+ return false; -+ -+ return true; -+} -+ - static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg) - { - struct xen_memory_exchange exch; -@@ -483,6 +508,12 @@ static long memory_exchange(XEN_GUEST_HA - } - } - -+ if ( unlikely(!propagate_node(exch.out.mem_flags, &memflags)) ) -+ { -+ rc = -EINVAL; -+ goto fail_early; -+ } -+ - d = rcu_lock_domain_by_any_id(exch.in.domid); - if ( d == NULL ) - { -@@ -501,7 +532,6 @@ static long memory_exchange(XEN_GUEST_HA - d, - XENMEMF_get_address_bits(exch.out.mem_flags) ? : - (BITS_PER_LONG+PAGE_SHIFT))); -- memflags |= MEMF_node(XENMEMF_get_node(exch.out.mem_flags)); - - for ( i = (exch.nr_exchanged >> in_chunk_order); - i < (exch.in.nr_extents >> in_chunk_order); -@@ -864,12 +894,8 @@ static int construct_memop_from_reservat - } - read_unlock(&d->vnuma_rwlock); - } -- else -- { -- a->memflags |= MEMF_node(XENMEMF_get_node(r->mem_flags)); -- if ( r->mem_flags & XENMEMF_exact_node_request ) -- a->memflags |= MEMF_exact_node; -- } -+ else if ( unlikely(!propagate_node(r->mem_flags, &a->memflags)) ) -+ return -EINVAL; - - return 0; - } ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -706,9 +706,13 @@ static struct page_info *alloc_heap_page - if ( node >= MAX_NUMNODES ) - node = cpu_to_node(smp_processor_id()); - } -+ else if ( unlikely(node >= MAX_NUMNODES) ) -+ { -+ ASSERT_UNREACHABLE(); -+ return NULL; -+ } - first_node = node; - -- ASSERT(node < MAX_NUMNODES); - ASSERT(zone_lo <= zone_hi); - ASSERT(zone_hi < NR_ZONES); - diff -Nru xen-4.9.0/debian/patches/xsa232.patch xen-4.9.2/debian/patches/xsa232.patch --- xen-4.9.0/debian/patches/xsa232.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa232.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -From: Andrew Cooper -Subject: grant_table: fix GNTTABOP_cache_flush handling - -Don't fall over a NULL grant_table pointer when the owner of the domain -is a system domain (DOMID_{XEN,IO} etc). - -This is XSA-232. - -Reported-by: Matthew Daley -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -3053,7 +3053,7 @@ static int cache_flush(gnttab_cache_flus - - page = mfn_to_page(mfn); - owner = page_get_owner_and_reference(page); -- if ( !owner ) -+ if ( !owner || !owner->grant_table ) - { - rcu_unlock_domain(d); - return -EPERM; diff -Nru xen-4.9.0/debian/patches/xsa233.patch xen-4.9.2/debian/patches/xsa233.patch --- xen-4.9.0/debian/patches/xsa233.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa233.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -From: Juergen Gross -Subject: tools/xenstore: dont unlink connection object twice - -A connection object of a domain with associated stubdom has two -parents: the domain and the stubdom. When cleaning up the list of -active domains in domain_cleanup() make sure not to unlink the -connection twice from the same domain. This could happen when the -domain and its stubdom are being destroyed at the same time leading -to the domain loop being entered twice. - -Additionally don't use talloc_free() in this case as it will remove -a random parent link, leading eventually to a memory leak. Use -talloc_unlink() instead specifying the context from which the -connection object should be removed. - -This is XSA-233. - -Reported-by: Eric Chanudet -Signed-off-by: Juergen Gross -Reviewed-by: Ian Jackson - ---- a/tools/xenstore/xenstored_domain.c -+++ b/tools/xenstore/xenstored_domain.c -@@ -221,10 +221,11 @@ static int destroy_domain(void *_domain) - static void domain_cleanup(void) - { - xc_dominfo_t dominfo; -- struct domain *domain, *tmp; -+ struct domain *domain; - int notify = 0; - -- list_for_each_entry_safe(domain, tmp, &domains, list) { -+ again: -+ list_for_each_entry(domain, &domains, list) { - if (xc_domain_getinfo(*xc_handle, domain->domid, 1, - &dominfo) == 1 && - dominfo.domid == domain->domid) { -@@ -236,8 +237,12 @@ static void domain_cleanup(void) - if (!dominfo.dying) - continue; - } -- talloc_free(domain->conn); -- notify = 0; /* destroy_domain() fires the watch */ -+ if (domain->conn) { -+ talloc_unlink(talloc_autofree_context(), domain->conn); -+ domain->conn = NULL; -+ notify = 0; /* destroy_domain() fires the watch */ -+ goto again; -+ } - } - - if (notify) diff -Nru xen-4.9.0/debian/patches/xsa234-4.9.patch xen-4.9.2/debian/patches/xsa234-4.9.patch --- xen-4.9.0/debian/patches/xsa234-4.9.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa234-4.9.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,192 +0,0 @@ -From: Jan Beulich -Subject: gnttab: also validate PTE permissions upon destroy/replace - -In order for PTE handling to match up with the reference counting done -by common code, presence and writability of grant mapping PTEs must -also be taken into account; validating just the frame number is not -enough. This is in particular relevant if a guest fiddles with grant -PTEs via non-grant hypercalls. - -Note that the flags being passed to replace_grant_host_mapping() -already happen to be those of the existing mapping, so no new function -parameter is needed. - -This is XSA-234. - -Reported-by: Andrew Cooper -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -4058,7 +4058,8 @@ static int create_grant_pte_mapping( - } - - static int destroy_grant_pte_mapping( -- uint64_t addr, unsigned long frame, struct domain *d) -+ uint64_t addr, unsigned long frame, unsigned int grant_pte_flags, -+ struct domain *d) - { - int rc = GNTST_okay; - void *va; -@@ -4104,17 +4105,29 @@ static int destroy_grant_pte_mapping( - - ol1e = *(l1_pgentry_t *)va; - -- /* Check that the virtual address supplied is actually mapped to frame. */ -- if ( unlikely(l1e_get_pfn(ol1e) != frame) ) -+ /* -+ * Check that the PTE supplied actually maps frame (with appropriate -+ * permissions). -+ */ -+ if ( unlikely(l1e_get_pfn(ol1e) != frame) || -+ unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ (_PAGE_PRESENT | _PAGE_RW)) ) - { - page_unlock(page); -- gdprintk(XENLOG_WARNING, -- "PTE entry %"PRIpte" for address %"PRIx64" doesn't match frame %lx\n", -- l1e_get_intpte(ol1e), addr, frame); -+ gdprintk(XENLOG_ERR, -+ "PTE %"PRIpte" at %"PRIx64" doesn't match grant (%"PRIpte")\n", -+ l1e_get_intpte(ol1e), addr, -+ l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags))); - rc = GNTST_general_error; - goto failed; - } - -+ if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) ) -+ gdprintk(XENLOG_WARNING, -+ "PTE flags %x at %"PRIx64" don't match grant (%x)\n", -+ l1e_get_flags(ol1e), addr, grant_pte_flags); -+ - /* Delete pagetable entry. */ - if ( unlikely(!UPDATE_ENTRY - (l1, -@@ -4123,7 +4136,8 @@ static int destroy_grant_pte_mapping( - 0)) ) - { - page_unlock(page); -- gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", va); -+ gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %"PRIx64"\n", -+ addr); - rc = GNTST_general_error; - goto failed; - } -@@ -4191,7 +4205,8 @@ static int create_grant_va_mapping( - } - - static int replace_grant_va_mapping( -- unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu *v) -+ unsigned long addr, unsigned long frame, unsigned int grant_pte_flags, -+ l1_pgentry_t nl1e, struct vcpu *v) - { - l1_pgentry_t *pl1e, ol1e; - unsigned long gl1mfn; -@@ -4227,20 +4242,33 @@ static int replace_grant_va_mapping( - - ol1e = *pl1e; - -- /* Check that the virtual address supplied is actually mapped to frame. */ -- if ( unlikely(l1e_get_pfn(ol1e) != frame) ) -- { -- gdprintk(XENLOG_WARNING, -- "PTE entry %lx for address %lx doesn't match frame %lx\n", -- l1e_get_pfn(ol1e), addr, frame); -+ /* -+ * Check that the virtual address supplied is actually mapped to frame -+ * (with appropriate permissions). -+ */ -+ if ( unlikely(l1e_get_pfn(ol1e) != frame) || -+ unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ (_PAGE_PRESENT | _PAGE_RW)) ) -+ { -+ gdprintk(XENLOG_ERR, -+ "PTE %"PRIpte" for %lx doesn't match grant (%"PRIpte")\n", -+ l1e_get_intpte(ol1e), addr, -+ l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags))); - rc = GNTST_general_error; - goto unlock_and_out; - } - -+ if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) ) -+ gdprintk(XENLOG_WARNING, -+ "PTE flags %x for %"PRIx64" don't match grant (%x)\n", -+ l1e_get_flags(ol1e), addr, grant_pte_flags); -+ - /* Delete pagetable entry. */ - if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) ) - { -- gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", pl1e); -+ gdprintk(XENLOG_WARNING, "Cannot delete PTE entry for %"PRIx64"\n", -+ addr); - rc = GNTST_general_error; - goto unlock_and_out; - } -@@ -4254,9 +4282,11 @@ static int replace_grant_va_mapping( - } - - static int destroy_grant_va_mapping( -- unsigned long addr, unsigned long frame, struct vcpu *v) -+ unsigned long addr, unsigned long frame, unsigned int grant_pte_flags, -+ struct vcpu *v) - { -- return replace_grant_va_mapping(addr, frame, l1e_empty(), v); -+ return replace_grant_va_mapping(addr, frame, grant_pte_flags, -+ l1e_empty(), v); - } - - static int create_grant_p2m_mapping(uint64_t addr, unsigned long frame, -@@ -4351,20 +4381,39 @@ int replace_grant_host_mapping( - unsigned long gl1mfn; - struct page_info *l1pg; - int rc; -+ unsigned int grant_pte_flags; - - if ( paging_mode_external(current->domain) ) - return replace_grant_p2m_mapping(addr, frame, new_addr, flags); - -+ grant_pte_flags = -+ _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_GNTTAB | _PAGE_NX; -+ -+ if ( flags & GNTMAP_application_map ) -+ grant_pte_flags |= _PAGE_USER; -+ if ( !(flags & GNTMAP_readonly) ) -+ grant_pte_flags |= _PAGE_RW; -+ /* -+ * On top of the explicit settings done by create_grant_host_mapping() -+ * also open-code relevant parts of adjust_guest_l1e(). Don't mirror -+ * available and cachability flags, though. -+ */ -+ if ( !is_pv_32bit_domain(curr->domain) ) -+ grant_pte_flags |= (grant_pte_flags & _PAGE_USER) -+ ? _PAGE_GLOBAL -+ : _PAGE_GUEST_KERNEL | _PAGE_USER; -+ - if ( flags & GNTMAP_contains_pte ) - { - if ( !new_addr ) -- return destroy_grant_pte_mapping(addr, frame, curr->domain); -+ return destroy_grant_pte_mapping(addr, frame, grant_pte_flags, -+ curr->domain); - - return GNTST_general_error; - } - - if ( !new_addr ) -- return destroy_grant_va_mapping(addr, frame, curr); -+ return destroy_grant_va_mapping(addr, frame, grant_pte_flags, curr); - - pl1e = guest_map_l1e(new_addr, &gl1mfn); - if ( !pl1e ) -@@ -4412,7 +4461,7 @@ int replace_grant_host_mapping( - put_page(l1pg); - guest_unmap_l1e(pl1e); - -- rc = replace_grant_va_mapping(addr, frame, ol1e, curr); -+ rc = replace_grant_va_mapping(addr, frame, grant_pte_flags, ol1e, curr); - if ( rc && !paging_mode_refcounts(curr->domain) ) - put_page_from_l1e(ol1e, curr->domain); - diff -Nru xen-4.9.0/debian/patches/xsa235-4.9.patch xen-4.9.2/debian/patches/xsa235-4.9.patch --- xen-4.9.0/debian/patches/xsa235-4.9.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa235-4.9.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,49 +0,0 @@ -From: Jan Beulich -Subject: arm/mm: release grant lock on xenmem_add_to_physmap_one() error paths - -Commit 55021ff9ab ("xen/arm: add_to_physmap_one: Avoid to map mfn 0 if -an error occurs") introduced error paths not releasing the grant table -lock. Replace them by a suitable check after the lock was dropped. - -This is XSA-235. - -Reported-by: Wei Liu -Signed-off-by: Jan Beulich -Reviewed-by: Julien Grall - ---- a/xen/arch/arm/mm.c -+++ b/xen/arch/arm/mm.c -@@ -1164,7 +1164,7 @@ int xenmem_add_to_physmap_one( - if ( idx < nr_status_frames(d->grant_table) ) - mfn = virt_to_mfn(d->grant_table->status[idx]); - else -- return -EINVAL; -+ mfn = mfn_x(INVALID_MFN); - } - else - { -@@ -1175,14 +1175,21 @@ int xenmem_add_to_physmap_one( - if ( idx < nr_grant_frames(d->grant_table) ) - mfn = virt_to_mfn(d->grant_table->shared_raw[idx]); - else -- return -EINVAL; -+ mfn = mfn_x(INVALID_MFN); - } - -- d->arch.grant_table_gfn[idx] = gfn; -+ if ( mfn != mfn_x(INVALID_MFN) ) -+ { -+ d->arch.grant_table_gfn[idx] = gfn; - -- t = p2m_ram_rw; -+ t = p2m_ram_rw; -+ } - - grant_write_unlock(d->grant_table); -+ -+ if ( mfn == mfn_x(INVALID_MFN) ) -+ return -EINVAL; -+ - break; - case XENMAPSPACE_shared_info: - if ( idx != 0 ) diff -Nru xen-4.9.0/debian/patches/xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch xen-4.9.2/debian/patches/xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch --- xen-4.9.0/debian/patches/xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -From: Jan Beulich -Subject: x86: don't allow MSI pIRQ mapping on unowned device - -MSI setup should be permitted only for existing devices owned by the -respective guest (the operation may still be carried out by the domain -controlling that guest). - -This is part of XSA-237. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -1963,7 +1963,10 @@ int map_domain_pirq( - if ( !cpu_has_apic ) - goto done; - -- pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); -+ pdev = pci_get_pdev_by_domain(d, msi->seg, msi->bus, msi->devfn); -+ if ( !pdev ) -+ goto done; -+ - ret = pci_enable_msi(msi, &msi_desc); - if ( ret ) - { diff -Nru xen-4.9.0/debian/patches/xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch xen-4.9.2/debian/patches/xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch --- xen-4.9.0/debian/patches/xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,65 +0,0 @@ -From: Jan Beulich -Subject: x86: enforce proper privilege when (un)mapping pIRQ-s - -(Un)mapping of IRQs, just like other RESOURCE__ADD* / RESOURCE__REMOVE* -actions (in FLASK terms) should be XSM_DM_PRIV rather than XSM_TARGET. -This in turn requires bypassing the XSM check in physdev_unmap_pirq() -for the HVM emuirq case just like is being done in physdev_map_pirq(). -The primary goal security wise, however, is to no longer allow HVM -guests, by specifying their own domain ID instead of DOMID_SELF, to -enter code paths intended for PV guest and the control domains of HVM -guests only. - -This is part of XSA-237. - -Signed-off-by: Jan Beulich -Reviewed-by: George Dunlap - ---- a/xen/arch/x86/physdev.c -+++ b/xen/arch/x86/physdev.c -@@ -111,7 +111,7 @@ int physdev_map_pirq(domid_t domid, int - if ( d == NULL ) - return -ESRCH; - -- ret = xsm_map_domain_pirq(XSM_TARGET, d); -+ ret = xsm_map_domain_pirq(XSM_DM_PRIV, d); - if ( ret ) - goto free_domain; - -@@ -256,13 +256,14 @@ int physdev_map_pirq(domid_t domid, int - int physdev_unmap_pirq(domid_t domid, int pirq) - { - struct domain *d; -- int ret; -+ int ret = 0; - - d = rcu_lock_domain_by_any_id(domid); - if ( d == NULL ) - return -ESRCH; - -- ret = xsm_unmap_domain_pirq(XSM_TARGET, d); -+ if ( domid != DOMID_SELF || !is_hvm_domain(d) || !has_pirq(d) ) -+ ret = xsm_unmap_domain_pirq(XSM_DM_PRIV, d); - if ( ret ) - goto free_domain; - ---- a/xen/include/xsm/dummy.h -+++ b/xen/include/xsm/dummy.h -@@ -453,7 +453,7 @@ static XSM_INLINE char *xsm_show_irq_sid - - static XSM_INLINE int xsm_map_domain_pirq(XSM_DEFAULT_ARG struct domain *d) - { -- XSM_ASSERT_ACTION(XSM_TARGET); -+ XSM_ASSERT_ACTION(XSM_DM_PRIV); - return xsm_default_action(action, current->domain, d); - } - -@@ -465,7 +465,7 @@ static XSM_INLINE int xsm_map_domain_irq - - static XSM_INLINE int xsm_unmap_domain_pirq(XSM_DEFAULT_ARG struct domain *d) - { -- XSM_ASSERT_ACTION(XSM_TARGET); -+ XSM_ASSERT_ACTION(XSM_DM_PRIV); - return xsm_default_action(action, current->domain, d); - } - diff -Nru xen-4.9.0/debian/patches/xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch xen-4.9.2/debian/patches/xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch --- xen-4.9.0/debian/patches/xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,54 +0,0 @@ -From: Jan Beulich -Subject: x86/MSI: disallow redundant enabling - -At the moment, Xen attempts to allow redundant enabling of MSI by -having pci_enable_msi() return 0, and point to the existing MSI -descriptor, when the msi already exists. - -Unfortunately, if subsequent errors are encountered, the cleanup -paths assume pci_enable_msi() had done full initialization, and -hence undo everything that was assumed to be done by that -function without also undoing other setup that would normally -occur only after that function was called (in map_domain_pirq() -itself). - -Rather than try to make the redundant enabling case work properly, just -forbid it entirely by having pci_enable_msi() return -EEXIST when MSI -is already set up. - -This is part of XSA-237. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Reviewed-by: George Dunlap - ---- a/xen/arch/x86/msi.c -+++ b/xen/arch/x86/msi.c -@@ -1050,11 +1050,10 @@ static int __pci_enable_msi(struct msi_i - old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI); - if ( old_desc ) - { -- printk(XENLOG_WARNING "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n", -+ printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n", - msi->irq, msi->seg, msi->bus, - PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); -- *desc = old_desc; -- return 0; -+ return -EEXIST; - } - - old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX); -@@ -1118,11 +1117,10 @@ static int __pci_enable_msix(struct msi_ - old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX); - if ( old_desc ) - { -- printk(XENLOG_WARNING "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n", -+ printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n", - msi->irq, msi->seg, msi->bus, - PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); -- *desc = old_desc; -- return 0; -+ return -EEXIST; - } - - old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI); diff -Nru xen-4.9.0/debian/patches/xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch xen-4.9.2/debian/patches/xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch --- xen-4.9.0/debian/patches/xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,123 +0,0 @@ -From: Jan Beulich -Subject: x86/IRQ: conditionally preserve irq <-> pirq mapping on map error paths - -Mappings that had been set up before should not be torn down when -handling unrelated errors. - -This is part of XSA-237. - -Signed-off-by: Jan Beulich -Reviewed-by: George Dunlap - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -1251,7 +1251,8 @@ static int prepare_domain_irq_pirq(struc - return -ENOMEM; - } - *pinfo = info; -- return 0; -+ -+ return !!err; - } - - static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq) -@@ -1294,7 +1295,10 @@ int init_domain_irq_mapping(struct domai - continue; - err = prepare_domain_irq_pirq(d, i, i, &info); - if ( err ) -+ { -+ ASSERT(err < 0); - break; -+ } - set_domain_irq_pirq(d, i, info); - } - -@@ -1902,6 +1906,7 @@ int map_domain_pirq( - struct pirq *info; - struct irq_desc *desc; - unsigned long flags; -+ DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {}; - - ASSERT(spin_is_locked(&d->event_lock)); - -@@ -1945,8 +1950,10 @@ int map_domain_pirq( - } - - ret = prepare_domain_irq_pirq(d, irq, pirq, &info); -- if ( ret ) -+ if ( ret < 0 ) - goto revoke; -+ if ( !ret ) -+ __set_bit(0, prepared); - - desc = irq_to_desc(irq); - -@@ -2018,8 +2025,10 @@ int map_domain_pirq( - irq = create_irq(NUMA_NO_NODE); - ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info) - : irq; -- if ( ret ) -+ if ( ret < 0 ) - break; -+ if ( !ret ) -+ __set_bit(nr, prepared); - msi_desc[nr].irq = irq; - - if ( irq_permit_access(d, irq) != 0 ) -@@ -2052,15 +2061,15 @@ int map_domain_pirq( - desc->msi_desc = NULL; - spin_unlock_irqrestore(&desc->lock, flags); - } -- while ( nr-- ) -+ while ( nr ) - { - if ( irq >= 0 && irq_deny_access(d, irq) ) - printk(XENLOG_G_ERR - "dom%d: could not revoke access to IRQ%d (pirq %d)\n", - d->domain_id, irq, pirq); -- if ( info ) -+ if ( info && test_bit(nr, prepared) ) - cleanup_domain_irq_pirq(d, irq, info); -- info = pirq_info(d, pirq + nr); -+ info = pirq_info(d, pirq + --nr); - irq = info->arch.irq; - } - msi_desc->irq = -1; -@@ -2076,12 +2085,14 @@ int map_domain_pirq( - spin_lock_irqsave(&desc->lock, flags); - set_domain_irq_pirq(d, irq, info); - spin_unlock_irqrestore(&desc->lock, flags); -+ ret = 0; - } - - done: - if ( ret ) - { -- cleanup_domain_irq_pirq(d, irq, info); -+ if ( test_bit(0, prepared) ) -+ cleanup_domain_irq_pirq(d, irq, info); - revoke: - if ( irq_deny_access(d, irq) ) - printk(XENLOG_G_ERR ---- a/xen/arch/x86/physdev.c -+++ b/xen/arch/x86/physdev.c -@@ -186,7 +186,7 @@ int physdev_map_pirq(domid_t domid, int - } - else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) - { -- if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) -+ if ( msi->entry_nr <= 0 || msi->entry_nr > MAX_MSI_IRQS ) - ret = -EDOM; - else if ( msi->entry_nr != 1 && !iommu_intremap ) - ret = -EOPNOTSUPP; ---- a/xen/include/asm-x86/msi.h -+++ b/xen/include/asm-x86/msi.h -@@ -56,6 +56,8 @@ - /* MAX fixed pages reserved for mapping MSIX tables. */ - #define FIX_MSIX_MAX_PAGES 512 - -+#define MAX_MSI_IRQS 32 /* limited by MSI capability struct properties */ -+ - struct msi_info { - u16 seg; - u8 bus; diff -Nru xen-4.9.0/debian/patches/xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch xen-4.9.2/debian/patches/xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch --- xen-4.9.0/debian/patches/xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ -From: Jan Beulich -Subject: x86/FLASK: fix unmap-domain-IRQ XSM hook - -The caller and the FLASK implementation of xsm_unmap_domain_irq() -disagreed about what the "data" argument points to in the MSI case: -Change both sides to pass/take a PCI device. - -This is part of XSA-237. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -2143,7 +2143,8 @@ int unmap_domain_pirq(struct domain *d, - nr = msi_desc->msi.nvec; - } - -- ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc); -+ ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, -+ msi_desc ? msi_desc->dev : NULL); - if ( ret ) - goto done; - ---- a/xen/xsm/flask/hooks.c -+++ b/xen/xsm/flask/hooks.c -@@ -918,8 +918,8 @@ static int flask_unmap_domain_msi (struc - u32 *sid, struct avc_audit_data *ad) - { - #ifdef CONFIG_HAS_PCI -- struct msi_info *msi = data; -- u32 machine_bdf = (msi->seg << 16) | (msi->bus << 8) | msi->devfn; -+ const struct pci_dev *pdev = data; -+ u32 machine_bdf = (pdev->seg << 16) | (pdev->bus << 8) | pdev->devfn; - - AVC_AUDIT_DATA_INIT(ad, DEV); - ad->device = machine_bdf; diff -Nru xen-4.9.0/debian/patches/xsa238.patch xen-4.9.2/debian/patches/xsa238.patch --- xen-4.9.0/debian/patches/xsa238.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa238.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,44 +0,0 @@ -From cdc2887076b19b39fab9faec495082586f3113df Mon Sep 17 00:00:00 2001 -From: XenProject Security Team -Date: Tue, 5 Sep 2017 13:41:37 +0200 -Subject: x86/ioreq server: correctly handle bogus - XEN_DMOP_{,un}map_io_range_to_ioreq_server arguments - -Misbehaving device model can pass incorrect XEN_DMOP_map/ -unmap_io_range_to_ioreq_server arguments, namely end < start when -specifying address range. When this happens we hit ASSERT(s <= e) in -rangeset_contains_range()/rangeset_overlaps_range() with debug builds. -Production builds will not trap right away but may misbehave later -while handling such bogus ranges. - -This is XSA-238. - -Reviewed-by: Jan Beulich ---- - xen/arch/x86/hvm/ioreq.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c -index b2a8b0e986..8c8bf1f0ec 100644 ---- a/xen/arch/x86/hvm/ioreq.c -+++ b/xen/arch/x86/hvm/ioreq.c -@@ -820,6 +820,9 @@ int hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id, - struct hvm_ioreq_server *s; - int rc; - -+ if ( start > end ) -+ return -EINVAL; -+ - spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); - - rc = -ENOENT; -@@ -872,6 +875,9 @@ int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id, - struct hvm_ioreq_server *s; - int rc; - -+ if ( start > end ) -+ return -EINVAL; -+ - spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); - - rc = -ENOENT; diff -Nru xen-4.9.0/debian/patches/xsa239.patch xen-4.9.2/debian/patches/xsa239.patch --- xen-4.9.0/debian/patches/xsa239.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa239.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,44 +0,0 @@ -From: Jan Beulich -Subject: x86/HVM: prefill partially used variable on emulation paths - -Certain handlers ignore the access size (vioapic_write() being the -example this was found with), perhaps leading to subsequent reads -seeing data that wasn't actually written by the guest. For -consistency and extra safety also do this on the read path of -hvm_process_io_intercept(), even if this doesn't directly affect what -guests get to see, as we've supposedly already dealt with read handlers -leaving data completely unitialized. - -This is XSA-239. - -Signed-off-by: Jan Beulich - ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -129,7 +129,7 @@ static int hvmemul_do_io( - .count = *reps, - .dir = dir, - .df = df, -- .data = data, -+ .data = data_is_addr ? data : 0, - .data_is_ptr = data_is_addr, /* ioreq_t field name is misleading */ - .state = STATE_IOREQ_READY, - }; ---- a/xen/arch/x86/hvm/intercept.c -+++ b/xen/arch/x86/hvm/intercept.c -@@ -127,6 +127,7 @@ int hvm_process_io_intercept(const struc - addr = (p->type == IOREQ_TYPE_COPY) ? - p->addr + step * i : - p->addr; -+ data = 0; - rc = ops->read(handler, addr, p->size, &data); - if ( rc != X86EMUL_OKAY ) - break; -@@ -161,6 +162,7 @@ int hvm_process_io_intercept(const struc - { - if ( p->data_is_ptr ) - { -+ data = 0; - switch ( hvm_copy_from_guest_phys(&data, p->data + step * i, - p->size) ) - { diff -Nru xen-4.9.0/debian/patches/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch xen-4.9.2/debian/patches/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch --- xen-4.9.0/debian/patches/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,493 +0,0 @@ -From 867988237d3e472fe2c99e81ae733e103422566c Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Thu, 28 Sep 2017 15:17:25 +0100 -Subject: [PATCH 1/2] x86: limit linear page table use to a single level - -That's the only way that they're meant to be used. Without such a -restriction arbitrarily long chains of same-level page tables can be -built, tearing down of which may then cause arbitrarily deep recursion, -causing a stack overflow. To facilitate this restriction, a counter is -being introduced to track both the number of same-level entries in a -page table as well as the number of uses of a page table in another -same-level one (counting into positive and negative direction -respectively, utilizing the fact that both counts can't be non-zero at -the same time). - -Note that the added accounting introduces a restriction on the number -of times a page can be used in other same-level page tables - more than -32k of such uses are no longer possible. - -Note also that some put_page_and_type[_preemptible]() calls are -replaced with open-coded equivalents. This seemed preferrable to -adding "parent_table" to the matrix of functions. - -Note further that cross-domain same-level page table references are no -longer permitted (they probably never should have been). - -This is XSA-240. - -Signed-off-by: Jan Beulich -Signed-off-by: George Dunlap ---- - xen/arch/x86/domain.c | 1 + - xen/arch/x86/mm.c | 171 ++++++++++++++++++++++++++++++++++++++----- - xen/include/asm-x86/domain.h | 2 + - xen/include/asm-x86/mm.h | 25 +++++-- - 4 files changed, 175 insertions(+), 24 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index d7e699228c..d7ed72c246 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -1226,6 +1226,7 @@ int arch_set_info_guest( - rc = -ERESTART; - /* Fallthrough */ - case -ERESTART: -+ v->arch.old_guest_ptpg = NULL; - v->arch.old_guest_table = - pagetable_get_page(v->arch.guest_table); - v->arch.guest_table = pagetable_null(); -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 86f5eda52d..1e469bd354 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -747,6 +747,61 @@ static void put_data_page( - put_page(page); - } - -+static bool inc_linear_entries(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; -+ -+ do { -+ /* -+ * The check below checks for the "linear use" count being non-zero -+ * as well as overflow. Signed integer overflow is undefined behavior -+ * according to the C spec. However, as long as linear_pt_count is -+ * smaller in size than 'int', the arithmetic operation of the -+ * increment below won't overflow; rather the result will be truncated -+ * when stored. Ensure that this is always true. -+ */ -+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); -+ oc = nc++; -+ if ( nc <= 0 ) -+ return false; -+ nc = cmpxchg(&pg->linear_pt_count, oc, nc); -+ } while ( oc != nc ); -+ -+ return true; -+} -+ -+static void dec_linear_entries(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) oc; -+ -+ oc = arch_fetch_and_add(&pg->linear_pt_count, -1); -+ ASSERT(oc > 0); -+} -+ -+static bool inc_linear_uses(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; -+ -+ do { -+ /* See the respective comment in inc_linear_entries(). */ -+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); -+ oc = nc--; -+ if ( nc >= 0 ) -+ return false; -+ nc = cmpxchg(&pg->linear_pt_count, oc, nc); -+ } while ( oc != nc ); -+ -+ return true; -+} -+ -+static void dec_linear_uses(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) oc; -+ -+ oc = arch_fetch_and_add(&pg->linear_pt_count, 1); -+ ASSERT(oc < 0); -+} -+ - /* - * We allow root tables to map each other (a.k.a. linear page tables). It - * needs some special care with reference counts and access permissions: -@@ -777,15 +832,35 @@ get_##level##_linear_pagetable( \ - \ - if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \ - { \ -+ struct page_info *ptpg = mfn_to_page(pde_pfn); \ -+ \ -+ /* Make sure the page table belongs to the correct domain. */ \ -+ if ( unlikely(page_get_owner(ptpg) != d) ) \ -+ return 0; \ -+ \ - /* Make sure the mapped frame belongs to the correct domain. */ \ - if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \ - return 0; \ - \ - /* \ -- * Ensure that the mapped frame is an already-validated page table. \ -+ * Ensure that the mapped frame is an already-validated page table \ -+ * and is not itself having linear entries, as well as that the \ -+ * containing page table is not iself in use as a linear page table \ -+ * elsewhere. \ - * If so, atomically increment the count (checking for overflow). \ - */ \ - page = mfn_to_page(pfn); \ -+ if ( !inc_linear_entries(ptpg) ) \ -+ { \ -+ put_page(page); \ -+ return 0; \ -+ } \ -+ if ( !inc_linear_uses(page) ) \ -+ { \ -+ dec_linear_entries(ptpg); \ -+ put_page(page); \ -+ return 0; \ -+ } \ - y = page->u.inuse.type_info; \ - do { \ - x = y; \ -@@ -793,6 +868,8 @@ get_##level##_linear_pagetable( \ - unlikely((x & (PGT_type_mask|PGT_validated)) != \ - (PGT_##level##_page_table|PGT_validated)) ) \ - { \ -+ dec_linear_uses(page); \ -+ dec_linear_entries(ptpg); \ - put_page(page); \ - return 0; \ - } \ -@@ -1226,6 +1303,9 @@ get_page_from_l4e( - l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \ - } while ( 0 ) - -+static int _put_page_type(struct page_info *page, bool preemptible, -+ struct page_info *ptpg); -+ - void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner) - { - unsigned long pfn = l1e_get_pfn(l1e); -@@ -1296,17 +1376,22 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) - if ( l2e_get_flags(l2e) & _PAGE_PSE ) - put_superpage(l2e_get_pfn(l2e)); - else -- put_page_and_type(l2e_get_page(l2e)); -+ { -+ struct page_info *pg = l2e_get_page(l2e); -+ int rc = _put_page_type(pg, false, mfn_to_page(pfn)); -+ -+ ASSERT(!rc); -+ put_page(pg); -+ } - - return 0; - } - --static int __put_page_type(struct page_info *, int preemptible); -- - static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - int partial, bool_t defer) - { - struct page_info *pg; -+ int rc; - - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) ) - return 1; -@@ -1329,21 +1414,28 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - if ( unlikely(partial > 0) ) - { - ASSERT(!defer); -- return __put_page_type(pg, 1); -+ return _put_page_type(pg, true, mfn_to_page(pfn)); - } - - if ( defer ) - { -+ current->arch.old_guest_ptpg = mfn_to_page(pfn); - current->arch.old_guest_table = pg; - return 0; - } - -- return put_page_and_type_preemptible(pg); -+ rc = _put_page_type(pg, true, mfn_to_page(pfn)); -+ if ( likely(!rc) ) -+ put_page(pg); -+ -+ return rc; - } - - static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - int partial, bool_t defer) - { -+ int rc = 1; -+ - if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && - (l4e_get_pfn(l4e) != pfn) ) - { -@@ -1352,18 +1444,22 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - if ( unlikely(partial > 0) ) - { - ASSERT(!defer); -- return __put_page_type(pg, 1); -+ return _put_page_type(pg, true, mfn_to_page(pfn)); - } - - if ( defer ) - { -+ current->arch.old_guest_ptpg = mfn_to_page(pfn); - current->arch.old_guest_table = pg; - return 0; - } - -- return put_page_and_type_preemptible(pg); -+ rc = _put_page_type(pg, true, mfn_to_page(pfn)); -+ if ( likely(!rc) ) -+ put_page(pg); - } -- return 1; -+ -+ return rc; - } - - static int alloc_l1_table(struct page_info *page) -@@ -1561,6 +1657,7 @@ static int alloc_l3_table(struct page_info *page) - { - page->nr_validated_ptes = i; - page->partial_pte = 0; -+ current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; - } - while ( i-- > 0 ) -@@ -1654,6 +1751,7 @@ static int alloc_l4_table(struct page_info *page) - { - if ( current->arch.old_guest_table ) - page->nr_validated_ptes++; -+ current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; - } - } -@@ -2403,14 +2501,20 @@ int free_page_type(struct page_info *pag - } - - --static int __put_final_page_type( -- struct page_info *page, unsigned long type, int preemptible) -+static int _put_final_page_type(struct page_info *page, unsigned long type, -+ bool preemptible, struct page_info *ptpg) - { - int rc = free_page_type(page, type, preemptible); - - /* No need for atomic update of type_info here: noone else updates it. */ - if ( rc == 0 ) - { -+ if ( ptpg && PGT_type_equal(type, ptpg->u.inuse.type_info) ) -+ { -+ dec_linear_uses(page); -+ dec_linear_entries(ptpg); -+ } -+ ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying); - /* - * Record TLB information for flush later. We do not stamp page tables - * when running in shadow mode: -@@ -2446,8 +2550,8 @@ static int __put_final_page_type( - } - - --static int __put_page_type(struct page_info *page, -- int preemptible) -+static int _put_page_type(struct page_info *page, bool preemptible, -+ struct page_info *ptpg) - { - unsigned long nx, x, y = page->u.inuse.type_info; - int rc = 0; -@@ -2474,12 +2578,28 @@ static int __put_page_type(struct page_info *page, - x, nx)) != x) ) - continue; - /* We cleared the 'valid bit' so we do the clean up. */ -- rc = __put_final_page_type(page, x, preemptible); -+ rc = _put_final_page_type(page, x, preemptible, ptpg); -+ ptpg = NULL; - if ( x & PGT_partial ) - put_page(page); - break; - } - -+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -+ { -+ /* -+ * page_set_tlbflush_timestamp() accesses the same union -+ * linear_pt_count lives in. Unvalidated page table pages, -+ * however, should occur during domain destruction only -+ * anyway. Updating of linear_pt_count luckily is not -+ * necessary anymore for a dying domain. -+ */ -+ ASSERT(page_get_owner(page)->is_dying); -+ ASSERT(page->linear_pt_count < 0); -+ ASSERT(ptpg->linear_pt_count > 0); -+ ptpg = NULL; -+ } -+ - /* - * Record TLB information for flush later. We do not stamp page - * tables when running in shadow mode: -@@ -2499,6 +2619,13 @@ static int __put_page_type(struct page_info *page, - return -EINTR; - } - -+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -+ { -+ ASSERT(!rc); -+ dec_linear_uses(page); -+ dec_linear_entries(ptpg); -+ } -+ - return rc; - } - -@@ -2638,6 +2765,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, - page->nr_validated_ptes = 0; - page->partial_pte = 0; - } -+ page->linear_pt_count = 0; - rc = alloc_page_type(page, type, preemptible); - } - -@@ -2652,7 +2780,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, - - void put_page_type(struct page_info *page) - { -- int rc = __put_page_type(page, 0); -+ int rc = _put_page_type(page, false, NULL); - ASSERT(rc == 0); - (void)rc; - } -@@ -2668,7 +2796,7 @@ int get_page_type(struct page_info *page, unsigned long type) - - int put_page_type_preemptible(struct page_info *page) - { -- return __put_page_type(page, 1); -+ return _put_page_type(page, true, NULL); - } - - int get_page_type_preemptible(struct page_info *page, unsigned long type) -@@ -2878,11 +3006,14 @@ int put_old_guest_table(struct vcpu *v) - if ( !v->arch.old_guest_table ) - return 0; - -- switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) ) -+ switch ( rc = _put_page_type(v->arch.old_guest_table, true, -+ v->arch.old_guest_ptpg) ) - { - case -EINTR: - case -ERESTART: - return -ERESTART; -+ case 0: -+ put_page(v->arch.old_guest_table); - } - - v->arch.old_guest_table = NULL; -@@ -3042,6 +3173,7 @@ int new_guest_cr3(unsigned long mfn) - rc = -ERESTART; - /* fallthrough */ - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - break; - default: -@@ -3310,7 +3442,10 @@ long do_mmuext_op( - if ( type == PGT_l1_page_table ) - put_page_and_type(page); - else -+ { -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; -+ } - } - } - -@@ -3346,6 +3481,7 @@ long do_mmuext_op( - { - case -EINTR: - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - rc = 0; - break; -@@ -3425,6 +3561,7 @@ long do_mmuext_op( - rc = -ERESTART; - /* fallthrough */ - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - break; - default: -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index 924caac834..5a512918cc 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -527,6 +527,8 @@ struct arch_vcpu - pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ - pagetable_t guest_table; /* (MFN) guest notion of cr3 */ - struct page_info *old_guest_table; /* partially destructed pagetable */ -+ struct page_info *old_guest_ptpg; /* containing page table of the */ -+ /* former, if any */ - /* guest_table holds a ref to the page, and also a type-count unless - * shadow refcounts are in use */ - pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index 119d7dec6b..445da50d47 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -124,11 +124,11 @@ struct page_info - u32 tlbflush_timestamp; - - /* -- * When PGT_partial is true then this field is valid and indicates -- * that PTEs in the range [0, @nr_validated_ptes) have been validated. -- * An extra page reference must be acquired (or not dropped) whenever -- * PGT_partial gets set, and it must be dropped when the flag gets -- * cleared. This is so that a get() leaving a page in partially -+ * When PGT_partial is true then the first two fields are valid and -+ * indicate that PTEs in the range [0, @nr_validated_ptes) have been -+ * validated. An extra page reference must be acquired (or not dropped) -+ * whenever PGT_partial gets set, and it must be dropped when the flag -+ * gets cleared. This is so that a get() leaving a page in partially - * validated state (where the caller would drop the reference acquired - * due to the getting of the type [apparently] failing [-ERESTART]) - * would not accidentally result in a page left with zero general -@@ -152,10 +152,18 @@ struct page_info - * put_page_from_lNe() (due to the apparent failure), and hence it - * must be dropped when the put operation is resumed (and completes), - * but it must not be acquired if picking up the page for validation. -+ * -+ * The 3rd field, @linear_pt_count, indicates -+ * - by a positive value, how many same-level page table entries a page -+ * table has, -+ * - by a negative value, in how many same-level page tables a page is -+ * in use. - */ - struct { -- u16 nr_validated_ptes; -- s8 partial_pte; -+ u16 nr_validated_ptes:PAGETABLE_ORDER + 1; -+ u16 :16 - PAGETABLE_ORDER - 1 - 2; -+ s16 partial_pte:2; -+ s16 linear_pt_count; - }; - - /* -@@ -206,6 +214,9 @@ struct page_info - #define PGT_count_width PG_shift(9) - #define PGT_count_mask ((1UL< -Date: Fri, 22 Sep 2017 11:46:55 +0100 -Subject: [PATCH 2/2] x86/mm: Disable PV linear pagetables by default - -Allowing pagetables to point to other pagetables of the same level -(often called 'linear pagetables') has been included in Xen since its -inception. But it is not used by the most common PV guests (Linux, -NetBSD, minios), and has been the source of a number of subtle -reference-counting bugs. - -Add a command-line option to control whether PV linear pagetables are -allowed (disabled by default). - -Signed-off-by: George Dunlap -Reviewed-by: Andrew Cooper ---- -Changes since v2: -- s/_/-/; in command-line option -- Added __read_mostly ---- - docs/misc/xen-command-line.markdown | 15 +++++++++++++++ - xen/arch/x86/mm.c | 10 ++++++++++ - 2 files changed, 25 insertions(+) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 44d99852aa..45ef873abb 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -1374,6 +1374,21 @@ The following resources are available: - CDP, one COS will corespond two CBMs other than one with CAT, due to the - sum of CBMs is fixed, that means actual `cos_max` in use will automatically - reduce to half when CDP is enabled. -+ -+### pv-linear-pt -+> `= ` -+ -+> Default: `false` -+ -+Allow PV guests to have pagetable entries pointing to other pagetables -+of the same level (i.e., allowing L2 PTEs to point to other L2 pages). -+This technique is often called "linear pagetables", and is sometimes -+used to allow operating systems a simple way to consistently map the -+current process's pagetables into its own virtual address space. -+ -+None of the most common PV operating systems (Linux, NetBSD, MiniOS) -+use this technique, but there may be custom operating systems which -+do. - - ### reboot - > `= t[riple] | k[bd] | a[cpi] | p[ci] | P[ower] | e[fi] | n[o] [, [w]arm | [c]old]` -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 1e469bd354..32952a46b9 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -814,6 +814,9 @@ static void dec_linear_uses(struct page_info *pg) - * frame if it is mapped by a different root table. This is sufficient and - * also necessary to allow validation of a root table mapping itself. - */ -+static bool __read_mostly pv_linear_pt_enable = false; -+boolean_param("pv-linear-pt", pv_linear_pt_enable); -+ - #define define_get_linear_pagetable(level) \ - static int \ - get_##level##_linear_pagetable( \ -@@ -823,6 +826,13 @@ get_##level##_linear_pagetable( \ - struct page_info *page; \ - unsigned long pfn; \ - \ -+ if ( !pv_linear_pt_enable ) \ -+ { \ -+ gdprintk(XENLOG_WARNING, \ -+ "Attempt to create linear p.t. (feature disabled)\n"); \ -+ return 0; \ -+ } \ -+ \ - if ( (level##e_get_flags(pde) & _PAGE_RW) ) \ - { \ - gdprintk(XENLOG_WARNING, \ --- -2.14.1 - diff -Nru xen-4.9.0/debian/patches/xsa241-4.9.patch xen-4.9.2/debian/patches/xsa241-4.9.patch --- xen-4.9.0/debian/patches/xsa241-4.9.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa241-4.9.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,131 +0,0 @@ -From: Jan Beulich -Subject: x86: don't store possibly stale TLB flush time stamp - -While the timing window is extremely narrow, it is theoretically -possible for an update to the TLB flush clock and a subsequent flush -IPI to happen between the read and write parts of the update of the -per-page stamp. Exclude this possibility by disabling interrupts -across the update, preventing the IPI to be serviced in the middle. - -This is XSA-241. - -Suggested-by: George Dunlap -Signed-off-by: Jan Beulich -Reviewed-by: George Dunlap - -Index: xen-4.9.0/xen/arch/x86/mm.c -=================================================================== ---- xen-4.9.0.orig/xen/arch/x86/mm.c -+++ xen-4.9.0/xen/arch/x86/mm.c -@@ -2534,7 +2534,7 @@ static int _put_final_page_type(struct p - */ - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - wmb(); - page->u.inuse.type_info--; - } -@@ -2544,7 +2544,7 @@ static int _put_final_page_type(struct p - (PGT_count_mask|PGT_validated|PGT_partial)) == 1); - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - wmb(); - page->u.inuse.type_info |= PGT_validated; - } -@@ -2598,7 +2598,7 @@ static int _put_page_type(struct page_in - if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) - { - /* -- * page_set_tlbflush_timestamp() accesses the same union -+ * set_tlbflush_timestamp() accesses the same union - * linear_pt_count lives in. Unvalidated page table pages, - * however, should occur during domain destruction only - * anyway. Updating of linear_pt_count luckily is not -@@ -2619,7 +2619,7 @@ static int _put_page_type(struct page_in - */ - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - } - - if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) -Index: xen-4.9.0/xen/arch/x86/mm/shadow/common.c -=================================================================== ---- xen-4.9.0.orig/xen/arch/x86/mm/shadow/common.c -+++ xen-4.9.0/xen/arch/x86/mm/shadow/common.c -@@ -1464,7 +1464,7 @@ void shadow_free(struct domain *d, mfn_t - * TLBs when we reuse the page. Because the destructors leave the - * contents of the pages in place, we can delay TLB flushes until - * just before the allocator hands the page out again. */ -- sp->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(sp); - perfc_decr(shadow_alloc_count); - page_list_add_tail(sp, &d->arch.paging.shadow.freelist); - sp = next; -Index: xen-4.9.0/xen/common/page_alloc.c -=================================================================== ---- xen-4.9.0.orig/xen/common/page_alloc.c -+++ xen-4.9.0/xen/common/page_alloc.c -@@ -960,7 +960,7 @@ static void free_heap_pages( - /* If a page has no owner it will need no safety TLB flush. */ - pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL); - if ( pg[i].u.free.need_tlbflush ) -- pg[i].tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(&pg[i]); - - /* This page is not a guest frame any more. */ - page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */ -Index: xen-4.9.0/xen/include/asm-arm/flushtlb.h -=================================================================== ---- xen-4.9.0.orig/xen/include/asm-arm/flushtlb.h -+++ xen-4.9.0/xen/include/asm-arm/flushtlb.h -@@ -12,6 +12,11 @@ static inline void tlbflush_filter(cpuma - - #define tlbflush_current_time() (0) - -+static inline void page_set_tlbflush_timestamp(struct page_info *page) -+{ -+ page->tlbflush_timestamp = tlbflush_current_time(); -+} -+ - #if defined(CONFIG_ARM_32) - # include - #elif defined(CONFIG_ARM_64) -Index: xen-4.9.0/xen/include/asm-x86/flushtlb.h -=================================================================== ---- xen-4.9.0.orig/xen/include/asm-x86/flushtlb.h -+++ xen-4.9.0/xen/include/asm-x86/flushtlb.h -@@ -23,6 +23,20 @@ DECLARE_PER_CPU(u32, tlbflush_time); - - #define tlbflush_current_time() tlbflush_clock - -+static inline void page_set_tlbflush_timestamp(struct page_info *page) -+{ -+ /* -+ * Prevent storing a stale time stamp, which could happen if an update -+ * to tlbflush_clock plus a subsequent flush IPI happen between the -+ * reading of tlbflush_clock and the writing of the struct page_info -+ * field. -+ */ -+ ASSERT(local_irq_is_enabled()); -+ local_irq_disable(); -+ page->tlbflush_timestamp = tlbflush_current_time(); -+ local_irq_enable(); -+} -+ - /* - * @cpu_stamp is the timestamp at last TLB flush for the CPU we are testing. - * @lastuse_stamp is a timestamp taken when the PFN we are testing was last -Index: xen-4.9.0/xen/arch/arm/smp.c -=================================================================== ---- xen-4.9.0.orig/xen/arch/arm/smp.c -+++ xen-4.9.0/xen/arch/arm/smp.c -@@ -1,3 +1,4 @@ -+#include - #include - #include - #include diff -Nru xen-4.9.0/debian/patches/xsa242-4.9.patch xen-4.9.2/debian/patches/xsa242-4.9.patch --- xen-4.9.0/debian/patches/xsa242-4.9.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa242-4.9.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,43 +0,0 @@ -From: Jan Beulich -Subject: x86: don't allow page_unlock() to drop the last type reference - -Only _put_page_type() does the necessary cleanup, and hence not all -domain pages can be released during guest cleanup (leaving around -zombie domains) if we get this wrong. - -This is XSA-242. - -Signed-off-by: Jan Beulich - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -1923,7 +1923,11 @@ void page_unlock(struct page_info *page) - - do { - x = y; -+ ASSERT((x & PGT_count_mask) && (x & PGT_locked)); -+ - nx = x - (1 | PGT_locked); -+ /* We must not drop the last reference here. */ -+ ASSERT(nx & PGT_count_mask); - } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x ); - } - -@@ -2611,6 +2615,17 @@ static int _put_page_type(struct page_in - (page->count_info & PGC_page_table)) ) - page_set_tlbflush_timestamp(page); - } -+ else if ( unlikely((nx & (PGT_locked | PGT_count_mask)) == -+ (PGT_locked | 1)) ) -+ { -+ /* -+ * We must not drop the second to last reference when the page is -+ * locked, as page_unlock() doesn't do any cleanup of the type. -+ */ -+ cpu_relax(); -+ y = page->u.inuse.type_info; -+ continue; -+ } - - if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) - break; diff -Nru xen-4.9.0/debian/patches/xsa243.patch xen-4.9.2/debian/patches/xsa243.patch --- xen-4.9.0/debian/patches/xsa243.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa243.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,93 +0,0 @@ -From: Andrew Cooper -Subject: x86/shadow: Don't create self-linear shadow mappings for 4-level translated guests - -When initially creating a monitor table for 4-level translated guests, don't -install a shadow-linear mapping. This mapping is actually self-linear, and -trips up the writeable heuristic logic into following Xen's mappings, not the -guests' shadows it was expecting to follow. - -A consequence of this is that sh_guess_wrmap() needs to cope with there being -no shadow-linear mapping present, which in practice occurs once each time a -vcpu switches to 4-level paging from a different paging mode. - -An appropriate shadow-linear slot will be inserted into the monitor table -either while constructing lower level monitor tables, or by sh_update_cr3(). - -While fixing this, clarify the safety of the other mappings. Despite -appearing unsafe, it is correct to create a guest-linear mapping for -translated domains; this is self-linear and doesn't point into the translated -domain. Drop a dead clause for translate != external guests. - -This is XSA-243. - -Signed-off-by: Andrew Cooper -Acked-by: Tim Deegan - -diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c -index 8d4f244..a18d286 100644 ---- a/xen/arch/x86/mm/shadow/multi.c -+++ b/xen/arch/x86/mm/shadow/multi.c -@@ -1485,26 +1485,38 @@ void sh_install_xen_entries_in_l4(struct domain *d, mfn_t gl4mfn, mfn_t sl4mfn) - sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty(); - } - -- /* Shadow linear mapping for 4-level shadows. N.B. for 3-level -- * shadows on 64-bit xen, this linear mapping is later replaced by the -- * monitor pagetable structure, which is built in make_monitor_table -- * and maintained by sh_update_linear_entries. */ -- sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = -- shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); -- -- /* Self linear mapping. */ -- if ( shadow_mode_translate(d) && !shadow_mode_external(d) ) -+ /* -+ * Linear mapping slots: -+ * -+ * Calling this function with gl4mfn == sl4mfn is used to construct a -+ * monitor table for translated domains. In this case, gl4mfn forms the -+ * self-linear mapping (i.e. not pointing into the translated domain), and -+ * the shadow-linear slot is skipped. The shadow-linear slot is either -+ * filled when constructing lower level monitor tables, or via -+ * sh_update_cr3() for 4-level guests. -+ * -+ * Calling this function with gl4mfn != sl4mfn is used for non-translated -+ * guests, where the shadow-linear slot is actually self-linear, and the -+ * guest-linear slot points into the guests view of its pagetables. -+ */ -+ if ( shadow_mode_translate(d) ) - { -- // linear tables may not be used with translated PV guests -- sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -+ ASSERT(mfn_eq(gl4mfn, sl4mfn)); -+ -+ sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = - shadow_l4e_empty(); - } - else - { -- sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -- shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW); -+ ASSERT(!mfn_eq(gl4mfn, sl4mfn)); -+ -+ sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = -+ shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); - } - -+ sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -+ shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW); -+ - unmap_domain_page(sl4e); - } - #endif -@@ -4405,6 +4417,11 @@ static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn) - - /* Carefully look in the shadow linear map for the l1e we expect */ - #if SHADOW_PAGING_LEVELS >= 4 -+ /* Is a shadow linear map is installed in the first place? */ -+ sl4p = v->arch.paging.shadow.guest_vtable; -+ sl4p += shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START); -+ if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) -+ return 0; - sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr); - if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) - return 0; diff -Nru xen-4.9.0/debian/patches/xsa244.patch xen-4.9.2/debian/patches/xsa244.patch --- xen-4.9.0/debian/patches/xsa244.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa244.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,59 +0,0 @@ -From: Andrew Cooper -Subject: [PATCH] x86/cpu: Fix IST handling during PCPU bringup - -Clear IST references in newly allocated IDTs. Nothing good will come of -having them set before the TSS is suitably constructed (although the chances -of the CPU surviving such an IST interrupt/exception is extremely slim). - -Uniformly set the IST references after the TSS is in place. This fixes an -issue on AMD hardware, where onlining a PCPU while PCPU0 is in HVM context -will cause IST_NONE to be copied into the new IDT, making that PCPU vulnerable -to privilege escalation from PV guests until it subsequently schedules an HVM -guest. - -This is XSA-244 - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich ---- - xen/arch/x86/cpu/common.c | 5 +++++ - xen/arch/x86/smpboot.c | 3 +++ - 2 files changed, 8 insertions(+) - -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 78f5667..6cf3628 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -640,6 +640,7 @@ void __init early_cpu_init(void) - * - Sets up TSS with stack pointers, including ISTs - * - Inserts TSS selector into regular and compat GDTs - * - Loads GDT, IDT, TR then null LDT -+ * - Sets up IST references in the IDT - */ - void load_system_tables(void) - { -@@ -702,6 +703,10 @@ void load_system_tables(void) - asm volatile ("ltr %w0" : : "rm" (TSS_ENTRY << 3) ); - asm volatile ("lldt %w0" : : "rm" (0) ); - -+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF); -+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI); -+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE); -+ - /* - * Bottom-of-stack must be 16-byte aligned! - * -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 3ca716c..1609b62 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -724,6 +724,9 @@ static int cpu_smpboot_alloc(unsigned int cpu) - if ( idt_tables[cpu] == NULL ) - goto oom; - memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES * sizeof(idt_entry_t)); -+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE); -+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); -+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); - - for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); - i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) diff -Nru xen-4.9.0/debian/patches/xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch xen-4.9.2/debian/patches/xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch --- xen-4.9.0/debian/patches/xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,48 +0,0 @@ -From a48d47febc1340f27d6c716545692641a09b414c Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Thu, 21 Sep 2017 14:13:08 +0100 -Subject: [PATCH 1/2] xen/page_alloc: Cover memory unreserved after boot in - first_valid_mfn - -On Arm, some regions (e.g Initramfs, Dom0 Kernel...) are marked as -reserved until the hardware domain is built and they are copied into its -memory. Therefore, they will not be added in the boot allocator via -init_boot_pages. - -Instead, init_xenheap_pages will be called once the region are not used -anymore. - -Update first_valid_mfn in both init_heap_pages and init_boot_pages -(already exist) to cover all the cases. - -Signed-off-by: Julien Grall -[Adjust comment, added locking around first_valid_mfn update] -Signed-off-by: Boris Ostrovsky ---- - xen/common/page_alloc.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c -index 0b9f6cc6df..fbe5a8af39 100644 ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -1700,6 +1700,16 @@ static void init_heap_pages( - { - unsigned long i; - -+ /* -+ * Some pages may not go through the boot allocator (e.g reserved -+ * memory at boot but released just after --- kernel, initramfs, -+ * etc.). -+ * Update first_valid_mfn to ensure those regions are covered. -+ */ -+ spin_lock(&heap_lock); -+ first_valid_mfn = min_t(unsigned long, page_to_mfn(pg), first_valid_mfn); -+ spin_unlock(&heap_lock); -+ - for ( i = 0; i < nr_pages; i++ ) - { - unsigned int nid = phys_to_nid(page_to_maddr(pg+i)); --- -2.11.0 - diff -Nru xen-4.9.0/debian/patches/xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch xen-4.9.2/debian/patches/xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch --- xen-4.9.0/debian/patches/xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch 2017-10-10 09:24:52.000000000 +0000 +++ xen-4.9.2/debian/patches/xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,73 +0,0 @@ -From cbfcf039d0e0b6f4c4cb3de612f7bf788a0c47cd Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Mon, 18 Sep 2017 14:24:08 +0100 -Subject: [PATCH 2/2] xen/arm: Correctly report the memory region in the dummy - NUMA helpers - -NUMA is currently not supported on Arm. Because common code is -NUMA-aware, dummy helpers are instead provided to expose a single node. - -Those helpers are for instance used to know the region to scrub. - -However the memory region is not reported correctly. Indeed, the -frametable may not be at the beginning of the memory and there might be -multiple memory banks. This will lead to not scrub some part of the -memory. - -The memory information can be found using: - * first_valid_mfn as the start of the memory - * max_page - first_valid_mfn as the spanned pages - -Note that first_valid_mfn is now been exported. The prototype has been -added in asm-arm/numa.h and not in a common header because I would -expect the variable to become static once NUMA is fully supported on -Arm. - -Signed-off-by: Julien Grall ---- - xen/common/page_alloc.c | 6 +++++- - xen/include/asm-arm/numa.h | 10 ++++++++-- - 2 files changed, 13 insertions(+), 3 deletions(-) - -diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c -index fbe5a8af39..472c6fe329 100644 ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -192,7 +192,11 @@ PAGE_LIST_HEAD(page_broken_list); - * BOOT-TIME ALLOCATOR - */ - --static unsigned long __initdata first_valid_mfn = ~0UL; -+/* -+ * first_valid_mfn is exported because it is use in ARM specific NUMA -+ * helpers. See comment in asm-arm/numa.h. -+ */ -+unsigned long first_valid_mfn = ~0UL; - - static struct bootmem_region { - unsigned long s, e; /* MFNs @s through @e-1 inclusive are free */ -diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h -index a2c1a3476d..3e7384da9e 100644 ---- a/xen/include/asm-arm/numa.h -+++ b/xen/include/asm-arm/numa.h -@@ -12,9 +12,15 @@ static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr) - return 0; - } - -+/* -+ * TODO: make first_valid_mfn static when NUMA is supported on Arm, this -+ * is required because the dummy helpers is using it. -+ */ -+extern unsigned long first_valid_mfn; -+ - /* XXX: implement NUMA support */ --#define node_spanned_pages(nid) (total_pages) --#define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx)) -+#define node_spanned_pages(nid) (max_page - first_valid_mfn) -+#define node_start_pfn(nid) (first_valid_mfn) - #define __node_distance(a, b) (20) - - static inline unsigned int arch_get_dma_bitsize(void) --- -2.11.0 - diff -Nru xen-4.9.0/docs/man/xen-tscmode.pod.7 xen-4.9.2/docs/man/xen-tscmode.pod.7 --- xen-4.9.0/docs/man/xen-tscmode.pod.7 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/docs/man/xen-tscmode.pod.7 2018-03-28 13:10:55.000000000 +0000 @@ -96,18 +96,19 @@ =item * whether or not the VM has been saved/restored/migrated -To understand this in more detail, the rest of this document must -be read. =back +To understand this in more detail, the rest of this document must +be read. + =head1 DETERMINING RDTSC FREQUENCY To determine the frequency of rdtsc instructions that are emulated, -an "xm" command can be used by a privileged user of domain0. The +an "xl" command can be used by a privileged user of domain0. The command: - # xm debug-key s; xm dmesg | tail + # xl debug-key s; xl dmesg | tail provides information about TSC usage in each domain where TSC emulation is currently enabled. diff -Nru xen-4.9.0/docs/misc/arm/passthrough.txt xen-4.9.2/docs/misc/arm/passthrough.txt --- xen-4.9.0/docs/misc/arm/passthrough.txt 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/docs/misc/arm/passthrough.txt 2018-03-28 13:10:55.000000000 +0000 @@ -12,7 +12,11 @@ 2) Create a partial device tree describing the device. The IRQ are mapped 1:1 to the guest (i.e VIRQ == IRQ). For MMIO, you will have to find a hole in the guest memory layout (see xen/include/public/arch-arm.h, note that -the layout is not stable and can change between versions of Xen). +the layout is not stable and can change between versions of Xen). Please +be aware that passing a partial device tree to a VM is a powerful tool, +use it with care. In production, only allow assignment of devices which +have been previously tested and known to work correctly when given to +guests. /dts-v1/; @@ -48,6 +52,8 @@ - #size-cells * See http://www.devicetree.org/Device_Tree_Usage for more information about device tree. + * In this example, the device MMIO region is placed at a different + address (0x10000000) compared to the host address (0xfff51000) 3) Compile the partial guest device with dtc (Device Tree Compiler). For our purpose, the compiled file will be called guest-midway.dtb and @@ -60,3 +66,20 @@ irqs = [ 112, 113, 114 ] iomem = [ "0xfff51,1@0x10000" ] +Please refer to your platform docs for the MMIO ranges and interrupts. + +They can also be calculated from the original device tree (not +recommended). You can read about the "interrupts" property format in the +device tree bindings of the interrupt controller of your platform. For +example, in the case of GICv2 see [arm,gic.txt]; in the case of GICv3 +see [arm,gic-v3.txt] in the Linux repository. For both GICv2 and GICv3 +the "interrupts" property format is the same: the first cell is the +interrupt type, and the second cell is the interrupt number. Given that +SPI numbers start from 32, in this example 80 + 32 = 112. + +See man [xl.cfg] for the iomem format. The reg property is just a pair +of address, then size numbers, each of them can occupy 1 or 2 cells. + +[arm,gic.txt]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt +[arm,gic-v3.txt]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt +[xl.cfg]: https://xenbits.xen.org/docs/unstable/man/xl.cfg.5.html diff -Nru xen-4.9.0/docs/misc/grant-tables.txt xen-4.9.2/docs/misc/grant-tables.txt --- xen-4.9.0/docs/misc/grant-tables.txt 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/docs/misc/grant-tables.txt 2018-03-28 13:10:55.000000000 +0000 @@ -87,7 +87,8 @@ inconsistent grant table state such as current version, partially initialized active table pages, etc. - grant_table->maptrack_lock : spinlock used to protect the maptrack free list + grant_table->maptrack_lock : spinlock used to protect the maptrack limit + v->maptrack_freelist_lock : spinlock used to protect the maptrack free list active_grant_entry->lock : spinlock used to serialize modifications to active entries @@ -102,6 +103,10 @@ The maptrack free list is protected by its own spinlock. The maptrack lock may be locked while holding the grant table lock. + The maptrack_freelist_lock is an innermost lock. It may be locked + while holding other locks, but no other locks may be acquired within + it. + Active entries are obtained by calling active_entry_acquire(gt, ref). This function returns a pointer to the active entry after locking its spinlock. The caller must hold the grant table read lock before diff -Nru xen-4.9.0/docs/misc/xen-command-line.markdown xen-4.9.2/docs/misc/xen-command-line.markdown --- xen-4.9.0/docs/misc/xen-command-line.markdown 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/docs/misc/xen-command-line.markdown 2018-03-28 13:10:55.000000000 +0000 @@ -252,6 +252,33 @@ enough. Setting this to a high value may cause boot failure, particularly if the NMI watchdog is also enabled. +### bti (x86) +> `= List of [ thunk=retpoline|lfence|jmp, ibrs=, ibpb=, rsb_{vmexit,native}= ]` + +Branch Target Injection controls. By default, Xen will pick the most +appropriate BTI mitigations based on compiled in support, loaded microcode, +and hardware details. + +**WARNING: Any use of this option may interfere with heuristics. Use with +extreme care.** + +If Xen was compiled with INDIRECT_THUNK support, `thunk=` can be used to +select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` +locations. The default thunk is `retpoline` (generally preferred for Intel +hardware), with the alternatives being `jmp` (a `jmp *%reg` gadget, minimal +overhead), and `lfence` (an `lfence; jmp *%reg` gadget, preferred for AMD). + +On hardware supporting IBRS, the `ibrs=` option can be used to force or +prevent Xen using the feature itself. If Xen is not using IBRS itself, +functionality is still set up so IBRS can be virtualised for guests. + +On hardware supporting IBPB, the `ibpb=` option can be used to prevent Xen +from issuing Branch Prediction Barriers on vcpu context switches. + +The `rsb_vmexit=` and `rsb_native=` options can be used to fine tune when the +RSB gets overwritten. There are individual controls for an entry from HVM +context, and an entry from a native (PV or Xen) context. + ### xenheap\_megabytes (arm32) > `= ` @@ -418,6 +445,18 @@ respectively. * `verbose` option can be included as a string or also as `verbose=` +### cpuid (x86) +> `= List of comma separated booleans` + +This option allows for fine tuning of the facilities Xen will use, after +accounting for hardware capabilities as enumerated via CPUID. + +Currently accepted: + +The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb` are used by +default if avaiable. They can be ignored, e.g. `no-ibrsb`, at which point Xen +won't use them itself, and won't offer them to guests. + ### cpuid\_mask\_cpu (AMD only) > `= fam_0f_rev_c | fam_0f_rev_d | fam_0f_rev_e | fam_0f_rev_f | fam_0f_rev_g | fam_10_rev_b | fam_10_rev_c | fam_11_rev_b` @@ -1374,6 +1413,25 @@ CDP, one COS will corespond two CBMs other than one with CAT, due to the sum of CBMs is fixed, that means actual `cos_max` in use will automatically reduce to half when CDP is enabled. + +### pv-linear-pt +> `= ` + +> Default: `true` + +Only available if Xen is compiled with CONFIG\_PV\_LINEAR\_PT support +enabled. + +Allow PV guests to have pagetable entries pointing to other pagetables +of the same level (i.e., allowing L2 PTEs to point to other L2 pages). +This technique is often called "linear pagetables", and is sometimes +used to allow operating systems a simple way to consistently map the +current process's pagetables into its own virtual address space. + +Linux and MiniOS don't use this technique. NetBSD and Novell Netware +do; there may be other custom operating systems which do. If you're +certain you don't plan on having PV guests which use this feature, +turning it off can reduce the attack surface. ### reboot > `= t[riple] | k[bd] | a[cpi] | p[ci] | P[ower] | e[fi] | n[o] [, [w]arm | [c]old]` @@ -1772,6 +1830,15 @@ clustered mode. The default, given no hint from the **FADT**, is cluster mode. +### xpti +> `= ` + +> Default: `false` on AMD hardware +> Default: `true` everywhere else + +Override default selection of whether to isolate 64-bit PV guest page +tables. + ### xsave > `= ` diff -Nru xen-4.9.0/extras/mini-os/app.lds xen-4.9.2/extras/mini-os/app.lds --- xen-4.9.0/extras/mini-os/app.lds 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/app.lds 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,11 @@ +SECTIONS +{ + .app.bss : { + __app_bss_start = . ; + *(.bss .bss.*) + *(COMMON) + *(.lbss .lbss.*) + *(LARGE_COMMON) + __app_bss_end = . ; + } +} diff -Nru xen-4.9.0/extras/mini-os/arch/arm/arm32.S xen-4.9.2/extras/mini-os/arch/arm/arm32.S --- xen-4.9.0/extras/mini-os/arch/arm/arm32.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/arm/arm32.S 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,294 @@ +@ Offset of the kernel within the RAM. This is a Linux/zImage convention which we +@ rely on for now. +#define ZIMAGE_KERNEL_OFFSET 0x8000 + +.section .text + +.globl _start +_start: + @ zImage header +.rept 8 + mov r0, r0 +.endr + b reset + .word 0x016f2818 @ Magic numbers to help the loader + .word 0 @ zImage start address (0 = relocatable) + .word _edata - _start @ zImage end address (excludes bss section) + @ end of zImage header + +@ Called at boot time. Sets up MMU, exception vectors and stack, and then calls C arch_init() function. +@ => r2 -> DTB +@ <= never returns +@ Note: this boot code needs to be within the first (1MB - ZIMAGE_KERNEL_OFFSET) of _start. +reset: + @ Problem: the C code wants to be at a known address (_start), but Xen might + @ load us anywhere. We initialise the MMU (mapping virtual to physical @ addresses) + @ so everything ends up where the code expects it to be. + @ + @ We calculate the offet between where the linker thought _start would be and where + @ it actually is and initialise the page tables to have that offset for every page. + @ + @ When we turn on the MMU, we're still executing at the old address. We don't want + @ the code to disappear from under us. So we have to do the mapping in stages: + @ + @ 1. set up a mapping to our current page from both its current and desired addresses + @ 2. enable the MMU + @ 3. jump to the new address + @ 4. remap all the other pages with the calculated offset + + adr r1, _start @ r1 = physical address of _start + ldr r3, =_start @ r3 = (desired) virtual address of _start + sub r9, r1, r3 @ r9 = (physical - virtual) offset + + ldr r7, =_page_dir @ r7 = (desired) virtual addr of translation table + add r1, r7, r9 @ r1 = physical addr of translation table + + @ Tell the system where our page table is located. + @ This is the 16 KB top-level translation table, in which + @ each word maps one 1MB virtual section to a physical section. + @ Note: We leave TTBCR as 0, meaning that only TTBR0 is used and + @ we use the short-descriptor format (32-bit physical addresses). + orr r0, r1, #0b0001011 @ Sharable, Inner/Outer Write-Back Write-Allocate Cacheable + mcr p15, 0, r0, c2, c0, 0 @ set TTBR0 + + @ Set access permission for domains. + @ Domains are deprecated, but we have to configure them anyway. + @ We mark every page as being domain 0 and set domain 0 to "client mode" + @ (client mode = use access flags in page table). + mov r0, #1 @ 1 = client + mcr p15, 0, r0, c3, c0, 0 @ DACR + + @ Template (flags) for a 1 MB page-table entry. + @ TEX[2:0] C B = 001 1 1 (outer and inner write-back, write-allocate) + ldr r8, =(0x2 + /* Section entry */ \ + 0xc + /* C B */ \ + (3 << 10) + /* Read/write */ \ + (1 << 12) + /* TEX */ \ + (1 << 16) + /* Sharable */ \ + (1<<19)) /* Non-secure */ + @ r8 = template page table entry + + @ Add an entry for the current physical section, at the old and new + @ addresses. It's OK if they're the same. + mov r0, pc, lsr#20 + mov r0, r0, lsl#20 @ r0 = physical address of this code's section start + orr r3, r0, r8 @ r3 = table entry for this section + ldr r4, =_start @ r4 = desired virtual address of this section + str r3, [r1, r4, lsr#18] @ map desired virtual section to this code + str r3, [r1, r0, lsr#18] @ map current section to this code too + + @ Invalidate TLB + dsb @ Caching is off, but must still prevent reordering + mcr p15, 0, r1, c8, c7, 0 @ TLBIALL + + @ Enable MMU / SCTLR + mrc p15, 0, r1, c1, c0, 0 @ SCTLR + orr r1, r1, #3 << 11 @ enable icache, branch prediction + orr r1, r1, #4 + 1 @ enable dcache, MMU + mcr p15, 0, r1, c1, c0, 0 @ SCTLR + isb + + ldr r1, =stage2 @ Virtual address of stage2 + bx r1 + +@ Called once the MMU is enabled. The boot code and the page table are mapped, +@ but nothing else is yet. +@ +@ => r2 -> dtb (physical) +@ r7 = virtual address of page table +@ r8 = section entry template (flags) +@ r9 = desired physical - virtual offset +@ pc -> somewhere in newly-mapped virtual code section +stage2: + @ Invalidate TLB + mcr p15, 0, r1, c8, c7, 0 @ TLBIALL + isb + + @ The new mapping has now taken effect: + @ r7 -> page_dir + + @ Fill in the whole top-level translation table (at page_dir). + @ Populate the whole pagedir with 1MB section descriptors. + + mov r1, r7 @ r1 -> first section entry + add r3, r1, #4*4*1024 @ limit (4 GB address space, 4 byte entries) + orr r0, r8, r9 @ r0 = entry mapping section zero to start of physical RAM +1: + str r0, [r1],#4 @ write the section entry + add r0, r0, #1 << 20 @ next physical page (wraps) + cmp r1, r3 + bne 1b + + @ Invalidate TLB + dsb + mcr p15, 0, r1, c8, c7, 0 @ TLBIALL + isb + + @ Set VBAR -> exception_vector_table + @ SCTLR.V = 0 + adr r0, exception_vector_table + mcr p15, 0, r0, c12, c0, 0 + + @ Enable hardware floating point: + @ 1. Access to CP10 and CP11 must be enabled in the Coprocessor Access + @ Control Register (CP15.CACR): + mrc p15, 0, r1, c1, c0, 2 @ CACR + orr r1, r1, #(3 << 20) + (3 << 22) @ full access for CP10 & CP11 + mcr p15, 0, r1, c1, c0, 2 + @ 2. The EN bit in the FPEXC register must be set: + vmrs r0, FPEXC + orr r0, r0, #1<<30 @ EN (enable) + vmsr FPEXC, r0 + + @ Initialise 16 KB stack + ldr sp, =_boot_stack_end + + sub r0, r2, r9 @ r0 -> device tree (virtual address) + mov r1, r9 @ r1 = physical_address_offset + + b arch_init + +.pushsection .bss +@ Note: calling arch_init zeroes out this region. +.align 12 +.globl shared_info +shared_info: + .fill (1024), 4, 0x0 + +.align 3 +.globl irqstack +.globl irqstack_end +irqstack: + .fill (1024), 4, 0x0 +irqstack_end: + +fault_dump: + .fill 18, 4, 0x0 @ On fault, we save the registers + CPSR + handler address + +.popsection + +fault: + cpsid aif @ Disable interrupts + + ldr r13, =fault_dump + stmia r13, {r0-r12} @ Dump the non-banked registers directly (well, unless from FIQ mode) + str r14, [r13, #15 << 2] @ Our r14 is the faulting r15 + mov r0, r13 + + @ Save the caller's CPSR (our SPSR) too. + mrs r1, SPSR + str r1, [r13, #16 << 2] + + @ Switch to the mode we came from to get r13 and r14. + @ If coming from user mode, use System mode instead so we're still + @ privileged. + and r1, r1, #0x1f @ r1 = SPSR mode + cmp r1, #0x10 @ If from User mode + moveq r1, #0x1f @ Then use System mode instead + + mrs r3, CPSR @ r3 = our CPSR + bic r2, r3, #0x1f + orr r2, r2, r1 + msr CPSR, r2 @ Change to mode r1 + + @ Save old mode's r13, r14 + str r13, [r0, #13 << 2] + str r14, [r0, #14 << 2] + + msr CPSR, r3 @ Back to fault mode + + ldr r1, [r0, #17 << 2] + sub r1, r1, #12 @ Fix to point at start of handler + str r1, [r0, #17 << 2] + + @ Call C code to format the register dump. + @ Clobbers the stack, but we're not going to return anyway. + ldr sp, =_boot_stack_end + bl dump_registers + b do_exit + +@ We want to store a unique value to identify this handler, without corrupting +@ any of the registers. So, we store r15 (which will point just after the branch). +@ Later, we subtract 12 so the user gets pointed at the start of the exception +@ handler. +#define FAULT(name) \ +.globl fault_##name; \ +fault_##name: \ + ldr r13, =fault_dump; \ + str r15, [r13, #17 << 2]; \ + b fault + +FAULT(reset) +FAULT(undefined_instruction) +FAULT(svc) +FAULT(prefetch_call) +FAULT(prefetch_abort) +FAULT(data_abort) + +@ exception base address +.align 5 +.globl exception_vector_table +@ Note: remember to call CLREX if returning from an exception: +@ "The architecture enables the local monitor to treat any exclusive store as +@ matching a previous LDREX address. For this reason, use of the CLREX +@ instruction to clear an existing tag is required on context switches." +@ -- ARM Cortex-A Series Programmer’s Guide (Version: 4.0) +exception_vector_table: + b fault_reset + b fault_undefined_instruction + b fault_svc + b fault_prefetch_call + b fault_prefetch_abort + b fault_data_abort + b irq_handler @ IRQ + .word 0xe7f000f0 @ abort on FIQ + +@ Call fault_undefined_instruction in "Undefined mode" +bug: + .word 0xe7f000f0 @ und/udf - a "Permanently Undefined" instruction + +irq_handler: + ldr sp, =irqstack_end + push {r0 - r12, r14} + + ldr r0, IRQ_handler + cmp r0, #0 + beq bug + blx r0 @ call handler + + @ Return from IRQ + pop {r0 - r12, r14} + clrex + subs pc, lr, #4 + +.globl IRQ_handler +IRQ_handler: + .long 0x0 + + +.globl __arch_switch_threads +@ => r0 = &prev->sp +@ r1 = &next->sp +@ <= returns to next thread's saved return address +__arch_switch_threads: + push {r4-r11} @ Store callee-saved registers to old thread's stack + stmia r0, {sp, lr} @ Store current sp and ip to prev's struct thread + + ldmia r1, {sp, lr} @ Load new sp, ip from next's struct thread + pop {r4-r11} @ Load callee-saved registers from new thread's stack + + bx lr + +@ This is called if you try to divide by zero. For now, we make a supervisor call, +@ which will make us halt. +.globl raise +raise: + svc 0 + +.globl arm_start_thread +arm_start_thread: + pop {r0, r1} + @ r0 = user data + @ r1 -> thread's main function + ldr lr, =exit_thread + bx r1 diff -Nru xen-4.9.0/extras/mini-os/arch/arm/balloon.c xen-4.9.2/extras/mini-os/arch/arm/balloon.c --- xen-4.9.0/extras/mini-os/arch/arm/balloon.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/arm/balloon.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,32 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2016 - Juergen Gross, SUSE Linux GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#ifdef CONFIG_BALLOON + +void arch_pfn_add(unsigned long pfn, unsigned long mfn) +{ +} + +#endif diff -Nru xen-4.9.0/extras/mini-os/arch/arm/events.c xen-4.9.2/extras/mini-os/arch/arm/events.c --- xen-4.9.0/extras/mini-os/arch/arm/events.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/arm/events.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,31 @@ +#include +#include +#include +#include + +static void virq_debug(evtchn_port_t port, struct pt_regs *regs, void *params) +{ + printk("Received a virq_debug event\n"); +} + +evtchn_port_t debug_port = -1; +void arch_init_events(void) +{ + debug_port = bind_virq(VIRQ_DEBUG, (evtchn_handler_t)virq_debug, 0); + if(debug_port == -1) + BUG(); + unmask_evtchn(debug_port); +} + +void arch_unbind_ports(void) +{ + if(debug_port != -1) + { + mask_evtchn(debug_port); + unbind_evtchn(debug_port); + } +} + +void arch_fini_events(void) +{ +} diff -Nru xen-4.9.0/extras/mini-os/arch/arm/gic.c xen-4.9.2/extras/mini-os/arch/arm/gic.c --- xen-4.9.0/extras/mini-os/arch/arm/gic.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/arm/gic.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,238 @@ +// ARM GIC implementation + +#include +#include +#include +#include + +//#define VGIC_DEBUG +#ifdef VGIC_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=vgic.c, line=%d) " _f , __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + +extern void (*IRQ_handler)(void); + +struct gic { + volatile char *gicd_base; + volatile char *gicc_base; +}; + +static struct gic gic; + +// Distributor Interface +#define GICD_CTLR 0x0 +#define GICD_ISENABLER 0x100 +#define GICD_IPRIORITYR 0x400 +#define GICD_ITARGETSR 0x800 +#define GICD_ICFGR 0xC00 + +// CPU Interface +#define GICC_CTLR 0x0 +#define GICC_PMR 0x4 +#define GICC_IAR 0xc +#define GICC_EOIR 0x10 +#define GICC_HPPIR 0x18 + +#define gicd(gic, offset) ((gic)->gicd_base + (offset)) +#define gicc(gic, offset) ((gic)->gicc_base + (offset)) + +#define REG(addr) ((uint32_t *)(addr)) + +static inline uint32_t REG_READ32(volatile uint32_t *addr) +{ + uint32_t value; + __asm__ __volatile__("ldr %0, [%1]":"=&r"(value):"r"(addr)); + rmb(); + return value; +} + +static inline void REG_WRITE32(volatile uint32_t *addr, unsigned int value) +{ + __asm__ __volatile__("str %0, [%1]"::"r"(value), "r"(addr)); + wmb(); +} + +static void gic_set_priority(struct gic *gic, int irq_number, unsigned char priority) +{ + uint32_t value; + uint32_t *addr = REG(gicd(gic, GICD_IPRIORITYR)) + (irq_number >> 2); + value = REG_READ32(addr); + value &= ~(0xff << (8 * (irq_number & 0x3))); // clear old priority + value |= priority << (8 * (irq_number & 0x3)); // set new priority + REG_WRITE32(addr, value); +} + +static void gic_route_interrupt(struct gic *gic, int irq_number, unsigned char cpu_set) +{ + uint32_t value; + uint32_t *addr = REG(gicd(gic, GICD_ITARGETSR)) + (irq_number >> 2); + value = REG_READ32(addr); + value &= ~(0xff << (8 * (irq_number & 0x3))); // clear old target + value |= cpu_set << (8 * (irq_number & 0x3)); // set new target + REG_WRITE32(addr, value); +} + +/* When accessing the GIC registers, we can't use LDREX/STREX because it's not regular memory. */ +static __inline__ void clear_bit_non_atomic(int nr, volatile void *base) +{ + volatile uint32_t *tmp = base; + tmp[nr >> 5] &= (unsigned long)~(1 << (nr & 0x1f)); +} + +static __inline__ void set_bit_non_atomic(int nr, volatile void *base) +{ + volatile uint32_t *tmp = base; + tmp[nr >> 5] |= (1 << (nr & 0x1f)); +} + +/* Note: not thread safe (but we only support one CPU for now anyway) */ +static void gic_enable_interrupt(struct gic *gic, int irq_number, + unsigned char cpu_set, unsigned char level_sensitive) +{ + int *set_enable_reg; + void *cfg_reg; + + // set priority + gic_set_priority(gic, irq_number, 0x0); + + // set target cpus for this interrupt + gic_route_interrupt(gic, irq_number, cpu_set); + + // set level/edge triggered + cfg_reg = (void *)gicd(gic, GICD_ICFGR); + if (level_sensitive) { + clear_bit_non_atomic((irq_number * 2) + 1, cfg_reg); + } else { + set_bit_non_atomic((irq_number * 2) + 1, cfg_reg); + } + + wmb(); + + // enable forwarding interrupt from distributor to cpu interface + set_enable_reg = (int *)gicd(gic, GICD_ISENABLER); + set_enable_reg[irq_number >> 5] = 1 << (irq_number & 0x1f); + wmb(); +} + +static void gic_enable_interrupts(struct gic *gic) +{ + // Global enable forwarding interrupts from distributor to cpu interface + REG_WRITE32(REG(gicd(gic, GICD_CTLR)), 0x00000001); + + // Global enable signalling of interrupt from the cpu interface + REG_WRITE32(REG(gicc(gic, GICC_CTLR)), 0x00000001); +} + +static void gic_disable_interrupts(struct gic *gic) +{ + // Global disable signalling of interrupt from the cpu interface + REG_WRITE32(REG(gicc(gic, GICC_CTLR)), 0x00000000); + + // Global disable forwarding interrupts from distributor to cpu interface + REG_WRITE32(REG(gicd(gic, GICD_CTLR)), 0x00000000); +} + +static void gic_cpu_set_priority(struct gic *gic, char priority) +{ + REG_WRITE32(REG(gicc(gic, GICC_PMR)), priority & 0x000000FF); +} + +static unsigned long gic_readiar(struct gic *gic) { + return REG_READ32(REG(gicc(gic, GICC_IAR))) & 0x000003FF; // Interrupt ID +} + +static void gic_eoir(struct gic *gic, uint32_t irq) { + REG_WRITE32(REG(gicc(gic, GICC_EOIR)), irq & 0x000003FF); +} + +//FIXME Get event_irq from dt +#define EVENTS_IRQ 31 +#define VIRTUALTIMER_IRQ 27 + +static void gic_handler(void) { + unsigned int irq = gic_readiar(&gic); + + DEBUG("IRQ received : %i\n", irq); + switch(irq) { + case EVENTS_IRQ: + do_hypervisor_callback(NULL); + break; + case VIRTUALTIMER_IRQ: + /* We need to get this event to wake us up from block_domain, + * but we don't need to do anything special with it. */ + break; + case 1022: + case 1023: + return; /* Spurious interrupt */ + default: + DEBUG("Unhandled irq\n"); + break; + } + + DEBUG("EIRQ\n"); + + gic_eoir(&gic, irq); +} + +void gic_init(void) { + gic.gicd_base = NULL; + int node = 0; + int depth = 0; + for (;;) + { + node = fdt_next_node(device_tree, node, &depth); + if (node <= 0 || depth < 0) + break; + + if (fdt_getprop(device_tree, node, "interrupt-controller", NULL)) { + int len = 0; + + if (fdt_node_check_compatible(device_tree, node, "arm,cortex-a15-gic") && + fdt_node_check_compatible(device_tree, node, "arm,cortex-a7-gic")) { + printk("Skipping incompatible interrupt-controller node\n"); + continue; + } + + const uint64_t *reg = fdt_getprop(device_tree, node, "reg", &len); + + /* We have two registers (GICC and GICD), each of which contains + * two parts (an address and a size), each of which is a 64-bit + * value (8 bytes), so we expect a length of 2 * 2 * 8 = 32. + * If any extra values are passed in future, we ignore them. */ + if (reg == NULL || len < 32) { + printk("Bad 'reg' property: %p %d\n", reg, len); + continue; + } + + gic.gicd_base = to_virt((long) fdt64_to_cpu(reg[0])); + gic.gicc_base = to_virt((long) fdt64_to_cpu(reg[2])); + printk("Found GIC: gicd_base = %p, gicc_base = %p\n", gic.gicd_base, gic.gicc_base); + break; + } + } + if (!gic.gicd_base) { + printk("GIC not found!\n"); + BUG(); + } + wmb(); + + /* Note: we could mark this as "device" memory here, but Xen will have already + * set it that way in the second stage translation table, so it's not necessary. + * See "Overlaying the memory type attribute" in the Architecture Reference Manual. + */ + + IRQ_handler = gic_handler; + + gic_disable_interrupts(&gic); + gic_cpu_set_priority(&gic, 0xff); + + /* Must call gic_enable_interrupts before enabling individual interrupts, otherwise our IRQ handler + * gets called endlessly with spurious interrupts. */ + gic_enable_interrupts(&gic); + + gic_enable_interrupt(&gic, EVENTS_IRQ /* interrupt number */, 0x1 /*cpu_set*/, 1 /*level_sensitive*/); + gic_enable_interrupt(&gic, VIRTUALTIMER_IRQ /* interrupt number */, 0x1 /*cpu_set*/, 1 /*level_sensitive*/); +} diff -Nru xen-4.9.0/extras/mini-os/arch/arm/hypercalls32.S xen-4.9.2/extras/mini-os/arch/arm/hypercalls32.S --- xen-4.9.0/extras/mini-os/arch/arm/hypercalls32.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/arm/hypercalls32.S 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,64 @@ +/****************************************************************************** + * hypercall.S + * + * Xen hypercall wrappers + * + * Stefano Stabellini , Citrix, 2012 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#define __HVC(imm16) .long ((0xE1400070 | (((imm16) & 0xFFF0) << 4) | ((imm16) & 0x000F)) & 0xFFFFFFFF) + +#define XEN_IMM 0xEA1 + +#define HYPERCALL_SIMPLE(hypercall) \ +.globl HYPERVISOR_##hypercall; \ +.align 4,0x90; \ +HYPERVISOR_##hypercall: \ + mov r12, #__HYPERVISOR_##hypercall; \ + __HVC(XEN_IMM); \ + mov pc, lr; + +#define _hypercall0 HYPERCALL_SIMPLE +#define _hypercall1 HYPERCALL_SIMPLE +#define _hypercall2 HYPERCALL_SIMPLE +#define _hypercall3 HYPERCALL_SIMPLE +#define _hypercall4 HYPERCALL_SIMPLE + +_hypercall2(sched_op); +_hypercall2(memory_op); +_hypercall2(event_channel_op); +_hypercall2(xen_version); +_hypercall3(console_io); +_hypercall1(physdev_op); +_hypercall3(grant_table_op); +_hypercall3(vcpu_op); +_hypercall1(sysctl); +_hypercall1(domctl); +_hypercall2(hvm_op); +_hypercall1(xsm_op); diff -Nru xen-4.9.0/extras/mini-os/arch/arm/minios-arm32.lds xen-4.9.2/extras/mini-os/arch/arm/minios-arm32.lds --- xen-4.9.0/extras/mini-os/arch/arm/minios-arm32.lds 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/arm/minios-arm32.lds 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,83 @@ +OUTPUT_ARCH(arm) +ENTRY(_start) +SECTIONS +{ + /* Note: we currently assume that Xen will load the kernel image + * at start-of-RAM + 0x8000. We use this initial 32 KB for the stack + * and translation tables. + */ + _boot_stack = 0x400000; /* 16 KB boot stack */ + _boot_stack_end = 0x404000; + _page_dir = 0x404000; /* 16 KB translation table */ + . = 0x408000; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + . = ALIGN(4096); + _erodata = .; + + /* newlib initialization functions */ + . = ALIGN(32 / 8); + PROVIDE (__preinit_array_start = .); + .preinit_array : { *(.preinit_array) } + PROVIDE (__preinit_array_end = .); + PROVIDE (__init_array_start = .); + .init_array : { *(.init_array) } + PROVIDE (__init_array_end = .); + PROVIDE (__fini_array_start = .); + .fini_array : { *(.fini_array) } + PROVIDE (__fini_array_end = .); + + .ctors : { + __CTOR_LIST__ = .; + *(.ctors) + CONSTRUCTORS + LONG(0) + __CTOR_END__ = .; + } + + .dtors : { + __DTOR_LIST__ = .; + *(.dtors) + LONG(0) + __DTOR_END__ = .; + } + + .data : { /* Data */ + *(.data) + } + + /* Note: linker will insert any extra sections here, just before .bss */ + + .bss : { + _edata = .; /* End of data included in image */ + /* Nothing after here is included in the zImage's size */ + + __bss_start = .; + *(.bss) + *(.app.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff -Nru xen-4.9.0/extras/mini-os/arch/arm/mm.c xen-4.9.2/extras/mini-os/arch/arm/mm.c --- xen-4.9.0/extras/mini-os/arch/arm/mm.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/arm/mm.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,156 @@ +#include +#include +#include +#include +#include +#include +#include + +uint32_t physical_address_offset; +struct e820entry e820_map[1] = { + { + .addr = 0, + .size = ULONG_MAX - 1, + .type = E820_RAM + } +}; +unsigned e820_entries = 1; + +unsigned long allocate_ondemand(unsigned long n, unsigned long alignment) +{ + // FIXME + BUG(); +} + +void arch_init_mm(unsigned long *start_pfn_p, unsigned long *max_pfn_p) +{ + int memory; + int prop_len = 0; + const uint64_t *regs; + + printk(" _text: %p(VA)\n", &_text); + printk(" _etext: %p(VA)\n", &_etext); + printk(" _erodata: %p(VA)\n", &_erodata); + printk(" _edata: %p(VA)\n", &_edata); + printk(" stack start: %p(VA)\n", _boot_stack); + printk(" _end: %p(VA)\n", &_end); + + if (fdt_num_mem_rsv(device_tree) != 0) + printk("WARNING: reserved memory not supported!\n"); + + memory = fdt_node_offset_by_prop_value(device_tree, -1, "device_type", "memory", sizeof("memory")); + if (memory < 0) { + printk("No memory found in FDT!\n"); + BUG(); + } + + /* Xen will always provide us at least one bank of memory. + * Mini-OS will use the first bank for the time-being. */ + regs = fdt_getprop(device_tree, memory, "reg", &prop_len); + + /* The property must contain at least the start address + * and size, each of which is 8-bytes. */ + if (regs == NULL || prop_len < 16) { + printk("Bad 'reg' property: %p %d\n", regs, prop_len); + BUG(); + } + + unsigned int end = (unsigned int) &_end; + paddr_t mem_base = fdt64_to_cpu(regs[0]); + uint64_t mem_size = fdt64_to_cpu(regs[1]); + printk("Found memory at 0x%llx (len 0x%llx)\n", + (unsigned long long) mem_base, (unsigned long long) mem_size); + + BUG_ON(to_virt(mem_base) > (void *) &_text); /* Our image isn't in our RAM! */ + *start_pfn_p = PFN_UP(to_phys(end)); + uint64_t heap_len = mem_size - (PFN_PHYS(*start_pfn_p) - mem_base); + *max_pfn_p = *start_pfn_p + PFN_DOWN(heap_len); + + printk("Using pages %lu to %lu as free space for heap.\n", *start_pfn_p, *max_pfn_p); + + /* The device tree is probably in memory that we're about to hand over to the page + * allocator, so move it to the end and reserve that space. + */ + uint32_t fdt_size = fdt_totalsize(device_tree); + void *new_device_tree = to_virt(((*max_pfn_p << PAGE_SHIFT) - fdt_size) & PAGE_MASK); + if (new_device_tree != device_tree) { + memmove(new_device_tree, device_tree, fdt_size); + } + device_tree = new_device_tree; + *max_pfn_p = to_phys(new_device_tree) >> PAGE_SHIFT; +} + +void arch_init_demand_mapping_area(void) +{ +} + +int do_map_frames(unsigned long addr, + const unsigned long *f, unsigned long n, unsigned long stride, + unsigned long increment, domid_t id, int *err, unsigned long prot) +{ + return -ENOSYS; +} + +/* Get Xen's suggested physical page assignments for the grant table. */ +static paddr_t get_gnttab_base(void) +{ + int hypervisor; + int len = 0; + const uint64_t *regs; + paddr_t gnttab_base; + + hypervisor = fdt_node_offset_by_compatible(device_tree, -1, "xen,xen"); + BUG_ON(hypervisor < 0); + + regs = fdt_getprop(device_tree, hypervisor, "reg", &len); + /* The property contains the address and size, 8-bytes each. */ + if (regs == NULL || len < 16) { + printk("Bad 'reg' property: %p %d\n", regs, len); + BUG(); + } + + gnttab_base = fdt64_to_cpu(regs[0]); + + printk("FDT suggests grant table base %llx\n", (unsigned long long) gnttab_base); + + return gnttab_base; +} + +grant_entry_v1_t *arch_init_gnttab(int nr_grant_frames) +{ + struct xen_add_to_physmap xatp; + struct gnttab_setup_table setup; + xen_pfn_t frames[nr_grant_frames]; + paddr_t gnttab_table; + int i, rc; + + gnttab_table = get_gnttab_base(); + + for (i = 0; i < nr_grant_frames; i++) + { + xatp.domid = DOMID_SELF; + xatp.size = 0; /* Seems to be unused */ + xatp.space = XENMAPSPACE_grant_table; + xatp.idx = i; + xatp.gpfn = (gnttab_table >> PAGE_SHIFT) + i; + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp); + BUG_ON(rc != 0); + } + + setup.dom = DOMID_SELF; + setup.nr_frames = nr_grant_frames; + set_xen_guest_handle(setup.frame_list, frames); + HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); + if (setup.status != 0) + { + printk("GNTTABOP_setup_table failed; status = %d\n", setup.status); + BUG(); + } + + return to_virt(gnttab_table); +} + +unsigned long map_frame_virt(unsigned long mfn) +{ + return mfn_to_virt(mfn); +} diff -Nru xen-4.9.0/extras/mini-os/arch/arm/panic.c xen-4.9.2/extras/mini-os/arch/arm/panic.c --- xen-4.9.0/extras/mini-os/arch/arm/panic.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/arm/panic.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,98 @@ +/****************************************************************************** + * panic.c + * + * Displays a register dump and stack trace for debugging. + * + * Copyright (c) 2014, Thomas Leonard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +extern int irqstack[]; +extern int irqstack_end[]; + +typedef void handler(void); + +extern handler fault_reset; +extern handler fault_undefined_instruction; +extern handler fault_svc; +extern handler fault_prefetch_call; +extern handler fault_prefetch_abort; +extern handler fault_data_abort; + +void dump_registers(int *saved_registers) { + static int in_dump = 0; + int *sp, *stack_top, *x; + char *fault_name; + void *fault_handler; + int i; + + if (in_dump) + { + printk("Crash while in dump_registers! Not generating a second report.\n"); + return; + } + + in_dump = 1; + + fault_handler = (handler *) saved_registers[17]; + if (fault_handler == fault_reset) + fault_name = "reset"; + else if (fault_handler == fault_undefined_instruction) + fault_name = "undefined_instruction"; + else if (fault_handler == fault_svc) + fault_name = "svc"; + else if (fault_handler == fault_prefetch_call) + fault_name = "prefetch_call"; + else if (fault_handler == fault_prefetch_abort) + fault_name = "prefetch_abort"; + else if (fault_handler == fault_data_abort) + fault_name = "data_abort"; + else + fault_name = "unknown fault type!"; + + printk("Fault handler at %p called (%s)\n", fault_handler, fault_name); + + for (i = 0; i < 16; i++) { + printk("r%d = %x\n", i, saved_registers[i]); + } + printk("CPSR = %x\n", saved_registers[16]); + + printk("Stack dump (innermost last)\n"); + sp = (int *) saved_registers[13]; + + if (sp >= _boot_stack && sp <= _boot_stack_end) + stack_top = _boot_stack_end; /* The boot stack */ + else if (sp >= irqstack && sp <= irqstack_end) + stack_top = irqstack_end; /* The IRQ stack */ + else + stack_top = (int *) ((((unsigned long) sp) | (__STACK_SIZE-1)) + 1); /* A normal thread stack */ + + for (x = stack_top - 1; x >= sp; x--) + { + printk(" [%8p] %8x\n", x, *x); + } + printk("End of stack\n"); + + in_dump = 0; +} diff -Nru xen-4.9.0/extras/mini-os/arch/arm/sched.c xen-4.9.2/extras/mini-os/arch/arm/sched.c --- xen-4.9.0/extras/mini-os/arch/arm/sched.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/arm/sched.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,47 @@ +#include +#include +#include + +void arm_start_thread(void); + +/* The AAPCS requires the callee (e.g. __arch_switch_threads) to preserve r4-r11. */ +#define CALLEE_SAVED_REGISTERS 8 + +/* Architecture specific setup of thread creation */ +struct thread* arch_create_thread(char *name, void (*function)(void *), + void *data) +{ + struct thread *thread; + + thread = xmalloc(struct thread); + /* We can't use lazy allocation here since the trap handler runs on the stack */ + thread->stack = (char *)alloc_pages(STACK_SIZE_PAGE_ORDER); + thread->name = name; + printk("Thread \"%s\": pointer: 0x%p, stack: 0x%p\n", name, thread, + thread->stack); + + /* Save pointer to the thread on the stack, used by current macro */ + *((unsigned long *)thread->stack) = (unsigned long)thread; + + /* Push the details to pass to arm_start_thread onto the stack. */ + int *sp = (int *) (thread->stack + STACK_SIZE); + *(--sp) = (int) function; + *(--sp) = (int) data; + + /* We leave room for the 8 callee-saved registers which we will + * try to restore on thread switch, even though they're not needed + * for the initial switch. */ + thread->sp = (unsigned long) sp - 4 * CALLEE_SAVED_REGISTERS; + + thread->ip = (unsigned long) arm_start_thread; + + return thread; +} + +void run_idle_thread(void) +{ + __asm__ __volatile__ ("mov sp, %0; bx %1":: + "r"(idle_thread->sp + 4 * CALLEE_SAVED_REGISTERS), + "r"(idle_thread->ip)); + /* Never arrive here! */ +} diff -Nru xen-4.9.0/extras/mini-os/arch/arm/setup.c xen-4.9.2/extras/mini-os/arch/arm/setup.c --- xen-4.9.0/extras/mini-os/arch/arm/setup.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/arm/setup.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Shared page for communicating with the hypervisor. + * Events flags go here, for example. + */ +shared_info_t *HYPERVISOR_shared_info; + +void *device_tree; + +/* + * INITIAL C ENTRY POINT. + */ +void arch_init(void *dtb_pointer, uint32_t physical_offset) +{ + int r; + + memset(&__bss_start, 0, &_end - &__bss_start); + + physical_address_offset = physical_offset; + + xprintk("Virtual -> physical offset = %x\n", physical_address_offset); + + xprintk("Checking DTB at %p...\n", dtb_pointer); + + if ((r = fdt_check_header(dtb_pointer))) { + xprintk("Invalid DTB from Xen: %s\n", fdt_strerror(r)); + BUG(); + } + device_tree = dtb_pointer; + + /* Map shared_info page */ + HYPERVISOR_shared_info = map_shared_info(NULL); + + get_console(NULL); + get_xenbus(NULL); + + gic_init(); + + start_kernel(); +} + +void +arch_fini(void) +{ +} + +void +arch_do_exit(void) +{ +} diff -Nru xen-4.9.0/extras/mini-os/arch/arm/time.c xen-4.9.2/extras/mini-os/arch/arm/time.c --- xen-4.9.0/extras/mini-os/arch/arm/time.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/arm/time.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,136 @@ +#include +#include +#include +#include +#include +#include +#include + +//#define VTIMER_DEBUG +#ifdef VTIMER_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=vtimer.c, line=%d) " _f , __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + +/************************************************************************ + * Time functions + *************************************************************************/ + +static uint64_t cntvct_at_init; +static uint32_t counter_freq; + +/* Compute with 96 bit intermediate result: (a*b)/c */ +uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) +{ + union { + uint64_t ll; + struct { + uint32_t low, high; + } l; + } u, res; + uint64_t rl, rh; + + u.ll = a; + rl = (uint64_t)u.l.low * (uint64_t)b; + rh = (uint64_t)u.l.high * (uint64_t)b; + rh += (rl >> 32); + res.l.high = rh / c; + res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; + return res.ll; +} + +static inline s_time_t ticks_to_ns(uint64_t ticks) +{ + return muldiv64(ticks, SECONDS(1), counter_freq); +} + +static inline uint64_t ns_to_ticks(s_time_t ns) +{ + return muldiv64(ns, counter_freq, SECONDS(1)); +} + +/* Wall-clock time is not currently available on ARM, so this is always zero for now: + * http://wiki.xenproject.org/wiki/Xen_ARM_TODO#Expose_Wallclock_time_to_guests + */ +static struct timespec shadow_ts; + +static inline uint64_t read_virtual_count(void) +{ + uint32_t c_lo, c_hi; + __asm__ __volatile__("mrrc p15, 1, %0, %1, c14":"=r"(c_lo), "=r"(c_hi)); + return (((uint64_t) c_hi) << 32) + c_lo; +} + +/* monotonic_clock(): returns # of nanoseconds passed since time_init() + * Note: This function is required to return accurate + * time even in the absence of multiple timer ticks. + */ +uint64_t monotonic_clock(void) +{ + return ticks_to_ns(read_virtual_count() - cntvct_at_init); +} + +int gettimeofday(struct timeval *tv, void *tz) +{ + uint64_t nsec = monotonic_clock(); + nsec += shadow_ts.tv_nsec; + + tv->tv_sec = shadow_ts.tv_sec; + tv->tv_sec += NSEC_TO_SEC(nsec); + tv->tv_usec = NSEC_TO_USEC(nsec % 1000000000UL); + + return 0; +} + +/* Set the timer and mask. */ +void write_timer_ctl(uint32_t value) { + __asm__ __volatile__( + "mcr p15, 0, %0, c14, c3, 1\n" + "isb"::"r"(value)); +} + +void set_vtimer_compare(uint64_t value) { + DEBUG("New CompareValue : %llx\n", value); + + __asm__ __volatile__("mcrr p15, 3, %0, %H0, c14" + ::"r"(value)); + + /* Enable timer and unmask the output signal */ + write_timer_ctl(1); +} + +void unset_vtimer_compare(void) { + /* Disable timer and mask the output signal */ + write_timer_ctl(2); +} + +void block_domain(s_time_t until) +{ + uint64_t until_count = ns_to_ticks(until) + cntvct_at_init; + ASSERT(irqs_disabled()); + if (read_virtual_count() < until_count) + { + set_vtimer_compare(until_count); + __asm__ __volatile__("wfi"); + unset_vtimer_compare(); + + /* Give the IRQ handler a chance to handle whatever woke us up. */ + local_irq_enable(); + local_irq_disable(); + } +} + +void init_time(void) +{ + printk("Initialising timer interface\n"); + + __asm__ __volatile__("mrc p15, 0, %0, c14, c0, 0":"=r"(counter_freq)); + cntvct_at_init = read_virtual_count(); + printk("Virtual Count register is %llx, freq = %d Hz\n", cntvct_at_init, counter_freq); +} + +void fini_time(void) +{ +} diff -Nru xen-4.9.0/extras/mini-os/arch/x86/arch.mk xen-4.9.2/extras/mini-os/arch/x86/arch.mk --- xen-4.9.0/extras/mini-os/arch/x86/arch.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/arch.mk 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,28 @@ +# +# Architecture special makerules for x86 family +# (including x86_32, x86_32y and x86_64). +# + +MINIOS_TARGET_ARCHS := x86_32 x86_64 + +ifeq ($(MINIOS_TARGET_ARCH),x86_32) +ARCH_CFLAGS := -m32 -march=i686 +ARCH_LDFLAGS := -m elf_i386 +ARCH_ASFLAGS := -m32 +EXTRA_INC += $(TARGET_ARCH_FAM)/$(MINIOS_TARGET_ARCH) +EXTRA_SRC += arch/$(EXTRA_INC) +endif + +ifeq ($(MINIOS_TARGET_ARCH),x86_64) +ARCH_CFLAGS := -m64 -mno-red-zone -fno-reorder-blocks +ARCH_CFLAGS += -fno-asynchronous-unwind-tables +ARCH_ASFLAGS := -m64 +ARCH_LDFLAGS := -m elf_x86_64 +EXTRA_INC += $(TARGET_ARCH_FAM)/$(MINIOS_TARGET_ARCH) +EXTRA_SRC += arch/$(EXTRA_INC) +endif + +ifeq ($(CONFIG_PARAVIRT),n) +ARCH_LDFLAGS_FINAL := --oformat=elf32-i386 +ARCH_AS_DEPS += x86_hvm.S +endif diff -Nru xen-4.9.0/extras/mini-os/arch/x86/balloon.c xen-4.9.2/extras/mini-os/arch/x86/balloon.c --- xen-4.9.0/extras/mini-os/arch/x86/balloon.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/balloon.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,155 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2016 - Juergen Gross, SUSE Linux GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_BALLOON +#ifdef CONFIG_PARAVIRT +static void p2m_invalidate(unsigned long *list, unsigned long start_idx) +{ + unsigned long idx; + + for ( idx = start_idx; idx < P2M_ENTRIES; idx++ ) + list[idx] = INVALID_P2M_ENTRY; +} + +static inline unsigned long *p2m_l3list(void) +{ + return mfn_to_virt(HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list); +} + +static inline unsigned long *p2m_to_virt(unsigned long p2m) +{ + return ( p2m == INVALID_P2M_ENTRY ) ? NULL : mfn_to_virt(p2m); +} + +void arch_remap_p2m(unsigned long max_pfn) +{ + unsigned long pfn, new_p2m; + unsigned long *l3_list, *l2_list, *l1_list; + + l3_list = p2m_l3list(); + l2_list = p2m_to_virt(l3_list[L3_P2M_IDX(max_pfn - 1)]); + l1_list = p2m_to_virt(l2_list[L2_P2M_IDX(max_pfn - 1)]); + + p2m_invalidate(l3_list, L3_P2M_IDX(max_pfn - 1) + 1); + p2m_invalidate(l2_list, L2_P2M_IDX(max_pfn - 1) + 1); + p2m_invalidate(l1_list, L1_P2M_IDX(max_pfn - 1) + 1); + + if ( p2m_pages(nr_max_pages) <= p2m_pages(max_pfn) ) + return; + + new_p2m = alloc_virt_kernel(p2m_pages(nr_max_pages)); + for ( pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES ) + { + map_frame_rw(new_p2m + PAGE_SIZE * (pfn / P2M_ENTRIES), + virt_to_mfn(phys_to_machine_mapping + pfn)); + } + + phys_to_machine_mapping = (unsigned long *)new_p2m; + printk("remapped p2m list to %p\n", phys_to_machine_mapping); +} + +int arch_expand_p2m(unsigned long max_pfn) +{ + unsigned long pfn; + unsigned long *l1_list, *l2_list, *l3_list; + + p2m_chk_pfn(max_pfn - 1); + l3_list = p2m_l3list(); + + for ( pfn = (HYPERVISOR_shared_info->arch.max_pfn + P2M_MASK) & ~P2M_MASK; + pfn < max_pfn; pfn += P2M_ENTRIES ) + { + l2_list = p2m_to_virt(l3_list[L3_P2M_IDX(pfn)]); + if ( !l2_list ) + { + l2_list = (unsigned long*)alloc_page(); + if ( !l2_list ) + return -ENOMEM; + p2m_invalidate(l2_list, 0); + l3_list[L3_P2M_IDX(pfn)] = virt_to_mfn(l2_list); + } + l1_list = p2m_to_virt(l2_list[L2_P2M_IDX(pfn)]); + if ( !l1_list ) + { + l1_list = (unsigned long*)alloc_page(); + if ( !l1_list ) + return -ENOMEM; + p2m_invalidate(l1_list, 0); + l2_list[L2_P2M_IDX(pfn)] = virt_to_mfn(l1_list); + + if ( map_frame_rw((unsigned long)(phys_to_machine_mapping + pfn), + l2_list[L2_P2M_IDX(pfn)]) ) + return -ENOMEM; + } + } + + HYPERVISOR_shared_info->arch.max_pfn = max_pfn; + + /* Make sure the new last page can be mapped. */ + if ( !need_pgt((unsigned long)pfn_to_virt(max_pfn - 1)) ) + return -ENOMEM; + + return 0; +} + +void arch_pfn_add(unsigned long pfn, unsigned long mfn) +{ + mmu_update_t mmu_updates[1]; + pgentry_t *pgt; + int rc; + + phys_to_machine_mapping[pfn] = mfn; + + pgt = need_pgt((unsigned long)pfn_to_virt(pfn)); + ASSERT(pgt); + mmu_updates[0].ptr = virt_to_mach(pgt) | MMU_NORMAL_PT_UPDATE; + mmu_updates[0].val = (pgentry_t)(mfn << PAGE_SHIFT) | + _PAGE_PRESENT | _PAGE_RW; + rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF); + if ( rc < 0 ) + { + printk("ERROR: build_pagetable(): PTE could not be updated\n"); + printk(" mmu_update failed with rc=%d\n", rc); + do_exit(); + } +} +#else +void arch_pfn_add(unsigned long pfn, unsigned long mfn) +{ + pgentry_t *pgt; + + pgt = need_pgt((unsigned long)pfn_to_virt(pfn)); + ASSERT(pgt); + if ( !(*pgt & _PAGE_PSE) ) + *pgt = (pgentry_t)(mfn << PAGE_SHIFT) | _PAGE_PRESENT | _PAGE_RW; +} +#endif + +#endif diff -Nru xen-4.9.0/extras/mini-os/arch/x86/events.c xen-4.9.2/extras/mini-os/arch/x86/events.c --- xen-4.9.0/extras/mini-os/arch/x86/events.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/events.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,35 @@ +#include +#include +#include + +#if defined(__x86_64__) +char irqstack[2 * STACK_SIZE]; + +static struct pda +{ + int irqcount; /* offset 0 (used in x86_64.S) */ + char *irqstackptr; /* 8 */ +} cpu0_pda; +#endif + +void arch_init_events(void) +{ +#if defined(__x86_64__) + asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); + wrmsrl(0xc0000101, (uint64_t)&cpu0_pda); /* 0xc0000101 is MSR_GS_BASE */ + cpu0_pda.irqcount = -1; + cpu0_pda.irqstackptr = (void*) (((unsigned long)irqstack + 2 * STACK_SIZE) + & ~(STACK_SIZE - 1)); +#endif +} + +void arch_unbind_ports(void) +{ +} + +void arch_fini_events(void) +{ +#if defined(__x86_64__) + wrmsrl(0xc0000101, 0); /* 0xc0000101 is MSR_GS_BASE */ +#endif +} diff -Nru xen-4.9.0/extras/mini-os/arch/x86/ioremap.c xen-4.9.2/extras/mini-os/arch/x86/ioremap.c --- xen-4.9.0/extras/mini-os/arch/x86/ioremap.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/ioremap.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2009, Netronome Systems, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include +#include +#include +#include +#include + +/* Map a physical address range into virtual address space with provided + * flags. Return a virtual address range it is mapped to. */ +static void *__do_ioremap(unsigned long phys_addr, unsigned long size, + unsigned long prot) +{ + unsigned long va; + unsigned long mfns, mfn; + unsigned long num_pages, offset; + + /* allow non page aligned addresses but for mapping we need to align them */ + offset = (phys_addr & ~PAGE_MASK); + num_pages = (offset + size + PAGE_SIZE - 1) / PAGE_SIZE; + phys_addr &= PAGE_MASK; + mfns = mfn = phys_addr >> PAGE_SHIFT; + + va = (unsigned long)map_frames_ex(&mfns, num_pages, 0, 1, 1, + DOMID_IO, NULL, prot); + return (void *)(va + offset); +} + +void *ioremap(unsigned long phys_addr, unsigned long size) +{ + return __do_ioremap(phys_addr, size, IO_PROT); +} + +void *ioremap_nocache(unsigned long phys_addr, unsigned long size) +{ + return __do_ioremap(phys_addr, size, IO_PROT_NOCACHE); +} + +/* Un-map the io-remapped region. Currently no list of existing mappings is + * maintained, so the caller has to supply the size */ +void iounmap(void *virt_addr, unsigned long size) +{ + unsigned long num_pages; + unsigned long va = (unsigned long)virt_addr; + + /* work out number of frames to unmap */ + num_pages = ((va & ~PAGE_MASK) + size + PAGE_SIZE - 1) / PAGE_SIZE; + + unmap_frames(va & PAGE_MASK, num_pages); +} + + + +/* -*- Mode:C; c-basic-offset:4; tab-width:4 indent-tabs-mode:nil -*- */ diff -Nru xen-4.9.0/extras/mini-os/arch/x86/iorw.c xen-4.9.2/extras/mini-os/arch/x86/iorw.c --- xen-4.9.0/extras/mini-os/arch/x86/iorw.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/iorw.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,35 @@ +#include + +void iowrite8(volatile void* addr, uint8_t val) +{ + *((volatile uint8_t*)addr) = val; +} +void iowrite16(volatile void* addr, uint16_t val) +{ + *((volatile uint16_t*)addr) = val; +} +void iowrite32(volatile void* addr, uint32_t val) +{ + *((volatile uint32_t*)addr) = val; +} +void iowrite64(volatile void* addr, uint64_t val) +{ + *((volatile uint64_t*)addr) = val; +} + +uint8_t ioread8(volatile void* addr) +{ + return *((volatile uint8_t*) addr); +} +uint16_t ioread16(volatile void* addr) +{ + return *((volatile uint16_t*) addr); +} +uint32_t ioread32(volatile void* addr) +{ + return *((volatile uint32_t*) addr); +} +uint64_t ioread64(volatile void* addr) +{ + return *((volatile uint64_t*) addr); +} diff -Nru xen-4.9.0/extras/mini-os/arch/x86/Makefile xen-4.9.2/extras/mini-os/arch/x86/Makefile --- xen-4.9.0/extras/mini-os/arch/x86/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/Makefile 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,28 @@ +# +# x86 architecture specific makefiles. +# It's is used for x86_32, x86_32y and x86_64 +# + +TOPLEVEL_DIR = $(CURDIR)/../.. +include ../../Config.mk + +include ../../minios.mk + +# Sources here are all *.c *.S without $(MINIOS_TARGET_ARCH).S +# This is handled in $(HEAD_ARCH_OBJ) +ARCH_SRCS := $(sort $(wildcard *.c)) + +# The objects built from the sources. +ARCH_OBJS := $(patsubst %.c,$(OBJ_DIR)/%.o,$(ARCH_SRCS)) + +all: $(OBJ_DIR)/$(ARCH_LIB) + +# $(HEAD_ARCH_OBJ) is only build here, needed on linking +# in ../../Makefile. +$(OBJ_DIR)/$(ARCH_LIB): $(ARCH_OBJS) $(OBJ_DIR)/$(HEAD_ARCH_OBJ) + $(AR) rv $(OBJ_DIR)/$(ARCH_LIB) $(ARCH_OBJS) + +clean: + rm -f $(OBJ_DIR)/$(ARCH_LIB) $(ARCH_OBJS) $(OBJ_DIR)/$(HEAD_ARCH_OBJ) + rm -f minios-x86_32.lds minios-x86_64.lds + diff -Nru xen-4.9.0/extras/mini-os/arch/x86/minios-x86.lds.S xen-4.9.2/extras/mini-os/arch/x86/minios-x86.lds.S --- xen-4.9.0/extras/mini-os/arch/x86/minios-x86.lds.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/minios-x86.lds.S 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,133 @@ +#if defined(__x86_64__) + +OUTPUT_FORMAT("elf64-x86-64") +OUTPUT_ARCH(i386:x86-64) + +#elif defined(__i386__) +#undef i386 +OUTPUT_FORMAT("elf32-i386") +OUTPUT_ARCH(i386) + +#else +# error Bad architecture to link with +#endif + +ENTRY(_start) +SECTIONS +{ + . = 0x0; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { + *(.rodata) + *(.rodata.*) + } + . = ALIGN(4096); + _erodata = .; + + .note : { + *(.note) + *(.note.*) + } + + /* newlib initialization functions */ +#if defined(__x86_64__) + . = ALIGN(64 / 8); +#else /* __i386 __ */ + . = ALIGN(32 / 8); +#endif + PROVIDE (__preinit_array_start = .); + .preinit_array : { + *(.preinit_array) + } + PROVIDE (__preinit_array_end = .); + PROVIDE (__init_array_start = .); + .init_array : { + *(.init_array) + } + PROVIDE (__init_array_end = .); + PROVIDE (__fini_array_start = .); + .fini_array : { + *(.fini_array) + } + PROVIDE (__fini_array_end = .); + + .ctors : { + __CTOR_LIST__ = .; + *(.ctors) + CONSTRUCTORS +#if defined(__x86_64__) + QUAD(0) +#else /* __i386__ */ + LONG(0) +#endif + __CTOR_END__ = .; + } + + .dtors : { + __DTOR_LIST__ = .; + *(.dtors) +#if defined(__x86_64__) + QUAD(0) +#else /* __i386__ */ + LONG(0) +#endif + __DTOR_END__ = .; + } + + .data : { /* Data */ + *(.data) + } + + _edata = .; /* End of data section */ + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + *(.app.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { + *(.stab) + } + .stabstr 0 : { + *(.stabstr) + } + .stab.excl 0 : { + *(.stab.excl) + } + .stab.exclstr 0 : { + *(.stab.exclstr) + } + .stab.index 0 : { + *(.stab.index) + } + .stab.indexstr 0 : { + *(.stab.indexstr) + } + .comment 0 : { + *(.comment) + } +} + +/* + * Local variables: + * tab-width: 8 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/arch/x86/mm.c xen-4.9.2/extras/mini-os/arch/x86/mm.c --- xen-4.9.0/extras/mini-os/arch/x86/mm.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/mm.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,926 @@ +/* + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: mm.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos + * + * Date: Aug 2003, chages Aug 2005 + * + * Environment: Xen Minimal OS + * Description: memory management related functions + * contains buddy page allocator from Xen. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef MM_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=mm.c, line=%d) " _f "\n", __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + +unsigned long *phys_to_machine_mapping; +unsigned long mfn_zero; +pgentry_t *pt_base; +static unsigned long first_free_pfn; +static unsigned long last_free_pfn; +static unsigned long virt_kernel_area_end = VIRT_KERNEL_AREA; + +extern char stack[]; +extern void page_walk(unsigned long va); + +#ifdef CONFIG_PARAVIRT +struct e820entry e820_map[1] = { + { + .addr = 0, + .size = ULONG_MAX - 1, + .type = E820_RAM + } +}; +unsigned e820_entries = 1; + +void arch_mm_preinit(void *p) +{ + start_info_t *si = p; + + phys_to_machine_mapping = (unsigned long *)si->mfn_list; + pt_base = (pgentry_t *)si->pt_base; + first_free_pfn = PFN_UP(to_phys(pt_base)) + si->nr_pt_frames; + last_free_pfn = si->nr_pages; +} +#else +#include +user_desc gdt[NR_GDT_ENTRIES] = +{ + [GDTE_CS64_DPL0] = INIT_GDTE_SYM(0, 0xfffff, COMMON, CODE, DPL0, R, L), + [GDTE_CS32_DPL0] = INIT_GDTE_SYM(0, 0xfffff, COMMON, CODE, DPL0, R, D), + [GDTE_DS32_DPL0] = INIT_GDTE_SYM(0, 0xfffff, COMMON, DATA, DPL0, B, W), + + [GDTE_CS64_DPL3] = INIT_GDTE_SYM(0, 0xfffff, COMMON, CODE, DPL3, R, L), + [GDTE_CS32_DPL3] = INIT_GDTE_SYM(0, 0xfffff, COMMON, CODE, DPL3, R, D), + [GDTE_DS32_DPL3] = INIT_GDTE_SYM(0, 0xfffff, COMMON, DATA, DPL3, B, W), + + /* [GDTE_TSS] */ + /* [GDTE_TSS + 1] */ +}; + +desc_ptr gdt_ptr = +{ + .limit = sizeof(gdt) - 1, + .base = (unsigned long)&gdt, +}; + +gate_desc idt[256] = { }; + +desc_ptr idt_ptr = +{ + .limit = sizeof(idt) - 1, + .base = (unsigned long)&idt, +}; + +struct e820entry e820_map[E820_MAX]; +unsigned e820_entries; + +static char *e820_types[E820_TYPES] = { + [E820_RAM] = "RAM", + [E820_RESERVED] = "Reserved", + [E820_ACPI] = "ACPI", + [E820_NVS] = "NVS", + [E820_UNUSABLE] = "Unusable", + [E820_PMEM] = "PMEM" +}; + +void arch_mm_preinit(void *p) +{ + long ret; + domid_t domid = DOMID_SELF; + struct xen_memory_map memmap; + int i; + unsigned long pfn, max = 0; + + pt_base = page_table_base; + first_free_pfn = PFN_UP(to_phys(&_end)); + ret = HYPERVISOR_memory_op(XENMEM_current_reservation, &domid); + if ( ret < 0 ) + { + xprintk("could not get memory size\n"); + do_exit(); + } + last_free_pfn = ret; + + memmap.nr_entries = E820_MAX; + set_xen_guest_handle(memmap.buffer, e820_map); + ret = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); + if ( ret < 0 ) + { + xprintk("could not get memory map\n"); + do_exit(); + } + e820_entries = memmap.nr_entries; + + for ( i = 0; i < e820_entries; i++ ) + { + if ( e820_map[i].type != E820_RAM ) + continue; + pfn = (e820_map[i].addr + e820_map[i].size) >> PAGE_SHIFT; + if ( pfn > max ) + max = pfn; + } + + if ( max < last_free_pfn ) + last_free_pfn = max; +} + +void arch_print_memmap(void) +{ + int i; + unsigned long from, to; + char *type; + char buf[12]; + + printk("Memory map:\n"); + for ( i = 0; i < e820_entries; i++ ) + { + if ( e820_map[i].type >= E820_TYPES || !e820_types[e820_map[i].type] ) + { + snprintf(buf, sizeof(buf), "%8x", e820_map[i].type); + type = buf; + } + else + { + type = e820_types[e820_map[i].type]; + } + from = e820_map[i].addr; + to = from + e820_map[i].size - 1; + printk("%012lx-%012lx: %s\n", from, to, type); + } +} +#endif + +/* + * Make pt_pfn a new 'level' page table frame and hook it into the page + * table at offset in previous level MFN (pref_l_mfn). pt_pfn is a guest + * PFN. + */ +static pgentry_t pt_prot[PAGETABLE_LEVELS] = { + L1_PROT, + L2_PROT, + L3_PROT, +#if defined(__x86_64__) + L4_PROT, +#endif +}; + +static void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn, + unsigned long offset, unsigned long level) +{ + pgentry_t *tab; + unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); +#ifdef CONFIG_PARAVIRT + mmu_update_t mmu_updates[1]; + int rc; +#endif + + DEBUG("Allocating new L%d pt frame for pfn=%lx, " + "prev_l_mfn=%lx, offset=%lx", + level, *pt_pfn, prev_l_mfn, offset); + + /* We need to clear the page, otherwise we might fail to map it + as a page table page */ + memset((void*) pt_page, 0, PAGE_SIZE); + + ASSERT(level >= 1 && level <= PAGETABLE_LEVELS); + +#ifdef CONFIG_PARAVIRT + /* Make PFN a page table page */ + tab = pt_base; +#if defined(__x86_64__) + tab = pte_to_virt(tab[l4_table_offset(pt_page)]); +#endif + tab = pte_to_virt(tab[l3_table_offset(pt_page)]); + + mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) + + sizeof(pgentry_t) * l1_table_offset(pt_page); + mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | + (pt_prot[level - 1] & ~_PAGE_RW); + + if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 ) + { + printk("ERROR: PTE for new page table page could not be updated\n"); + printk(" mmu_update failed with rc=%d\n", rc); + do_exit(); + } + + /* Hook the new page table page into the hierarchy */ + mmu_updates[0].ptr = + ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; + mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | + pt_prot[level]; + + if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 ) + { + printk("ERROR: mmu_update failed with rc=%d\n", rc); + do_exit(); + } +#else + tab = mfn_to_virt(prev_l_mfn); + tab[offset] = (*pt_pfn << PAGE_SHIFT) | pt_prot[level]; +#endif + + *pt_pfn += 1; +} + +/* + * Build the initial pagetable. + */ +static void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn) +{ + unsigned long start_address, end_address; + unsigned long pfn_to_map, pt_pfn = *start_pfn; + pgentry_t *tab = pt_base, page; + unsigned long pt_mfn = pfn_to_mfn(virt_to_pfn(pt_base)); + unsigned long offset; +#ifdef CONFIG_PARAVIRT + static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1]; + int count = 0; + int rc; +#endif + + /* Be conservative: even if we know there will be more pages already + mapped, start the loop at the very beginning. */ + pfn_to_map = *start_pfn; + +#ifdef CONFIG_PARAVIRT + if ( *max_pfn >= virt_to_pfn(HYPERVISOR_VIRT_START) ) + { + printk("WARNING: Mini-OS trying to use Xen virtual space. " + "Truncating memory from %luMB to ", + ((unsigned long)pfn_to_virt(*max_pfn) - + (unsigned long)&_text)>>20); + *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE); + printk("%luMB\n", + ((unsigned long)pfn_to_virt(*max_pfn) - + (unsigned long)&_text)>>20); + } +#else + /* Round up to next 2MB boundary as we are using 2MB pages on HVMlite. */ + pfn_to_map = (pfn_to_map + L1_PAGETABLE_ENTRIES - 1) & + ~(L1_PAGETABLE_ENTRIES - 1); +#endif + + start_address = (unsigned long)pfn_to_virt(pfn_to_map); + end_address = (unsigned long)pfn_to_virt(*max_pfn); + + /* We worked out the virtual memory range to map, now mapping loop */ + printk("Mapping memory range 0x%lx - 0x%lx\n", start_address, end_address); + + while ( start_address < end_address ) + { + tab = pt_base; + pt_mfn = pfn_to_mfn(virt_to_pfn(pt_base)); + +#if defined(__x86_64__) + offset = l4_table_offset(start_address); + /* Need new L3 pt frame */ + if ( !(tab[offset] & _PAGE_PRESENT) ) + new_pt_frame(&pt_pfn, pt_mfn, offset, L3_FRAME); + + page = tab[offset]; + pt_mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(pt_mfn) << PAGE_SHIFT); +#endif + offset = l3_table_offset(start_address); + /* Need new L2 pt frame */ + if ( !(tab[offset] & _PAGE_PRESENT) ) + new_pt_frame(&pt_pfn, pt_mfn, offset, L2_FRAME); + + page = tab[offset]; + pt_mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(pt_mfn) << PAGE_SHIFT); + offset = l2_table_offset(start_address); +#ifdef CONFIG_PARAVIRT + /* Need new L1 pt frame */ + if ( !(tab[offset] & _PAGE_PRESENT) ) + new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME); + + page = tab[offset]; + pt_mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(pt_mfn) << PAGE_SHIFT); + offset = l1_table_offset(start_address); + + if ( !(tab[offset] & _PAGE_PRESENT) ) + { + mmu_updates[count].ptr = + ((pgentry_t)pt_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; + mmu_updates[count].val = + (pgentry_t)pfn_to_mfn(pfn_to_map) << PAGE_SHIFT | L1_PROT; + count++; + } + pfn_to_map++; + if ( count == L1_PAGETABLE_ENTRIES || + (count && pfn_to_map == *max_pfn) ) + { + rc = HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF); + if ( rc < 0 ) + { + printk("ERROR: build_pagetable(): PTE could not be updated\n"); + printk(" mmu_update failed with rc=%d\n", rc); + do_exit(); + } + count = 0; + } + start_address += PAGE_SIZE; +#else + if ( !(tab[offset] & _PAGE_PRESENT) ) + tab[offset] = (pgentry_t)pfn_to_map << PAGE_SHIFT | + L2_PROT | _PAGE_PSE; + start_address += 1UL << L2_PAGETABLE_SHIFT; +#endif + } + + *start_pfn = pt_pfn; +} + +/* + * Mark portion of the address space read only. + */ +extern struct shared_info shared_info; +static void set_readonly(void *text, void *etext) +{ + unsigned long start_address = + ((unsigned long) text + PAGE_SIZE - 1) & PAGE_MASK; + unsigned long end_address = (unsigned long) etext; + pgentry_t *tab = pt_base, page; + unsigned long mfn = pfn_to_mfn(virt_to_pfn(pt_base)); + unsigned long offset; + unsigned long page_size = PAGE_SIZE; +#ifdef CONFIG_PARAVIRT + static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1]; + int count = 0; + int rc; +#endif + + printk("setting %p-%p readonly\n", text, etext); + + while ( start_address + page_size <= end_address ) + { + tab = pt_base; + mfn = pfn_to_mfn(virt_to_pfn(pt_base)); + +#if defined(__x86_64__) + offset = l4_table_offset(start_address); + page = tab[offset]; + mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); +#endif + offset = l3_table_offset(start_address); + page = tab[offset]; + mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); + offset = l2_table_offset(start_address); + if ( !(tab[offset] & _PAGE_PSE) ) + { + page = tab[offset]; + mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); + + offset = l1_table_offset(start_address); + } + + if ( start_address != (unsigned long)&shared_info ) + { +#ifdef CONFIG_PARAVIRT + mmu_updates[count].ptr = + ((pgentry_t)mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; + mmu_updates[count].val = tab[offset] & ~_PAGE_RW; + count++; +#else + tab[offset] &= ~_PAGE_RW; +#endif + } + else + printk("skipped %lx\n", start_address); + + start_address += page_size; + +#ifdef CONFIG_PARAVIRT + if ( count == L1_PAGETABLE_ENTRIES || + start_address + page_size > end_address ) + { + rc = HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF); + if ( rc < 0 ) + { + printk("ERROR: set_readonly(): PTE could not be updated\n"); + do_exit(); + } + count = 0; + } +#else + if ( start_address == (1UL << L2_PAGETABLE_SHIFT) ) + page_size = 1UL << L2_PAGETABLE_SHIFT; +#endif + } + +#ifdef CONFIG_PARAVIRT + { + mmuext_op_t op = { + .cmd = MMUEXT_TLB_FLUSH_ALL, + }; + int count; + HYPERVISOR_mmuext_op(&op, 1, &count, DOMID_SELF); + } +#else + write_cr3((unsigned long)pt_base); +#endif +} + +/* + * get the PTE for virtual address va if it exists. Otherwise NULL. + */ +static pgentry_t *get_pgt(unsigned long va) +{ + unsigned long mfn; + pgentry_t *tab; + unsigned offset; + + tab = pt_base; + mfn = virt_to_mfn(pt_base); + +#if defined(__x86_64__) + offset = l4_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + return NULL; + mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(mfn); +#endif + offset = l3_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + return NULL; + mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(mfn); + offset = l2_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + return NULL; + if ( tab[offset] & _PAGE_PSE ) + return &tab[offset]; + mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(mfn); + offset = l1_table_offset(va); + return &tab[offset]; +} + + +/* + * return a valid PTE for a given virtual address. If PTE does not exist, + * allocate page-table pages. + */ +pgentry_t *need_pgt(unsigned long va) +{ + unsigned long pt_mfn; + pgentry_t *tab; + unsigned long pt_pfn; + unsigned offset; + + tab = pt_base; + pt_mfn = virt_to_mfn(pt_base); + +#if defined(__x86_64__) + offset = l4_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + { + pt_pfn = virt_to_pfn(alloc_page()); + if ( !pt_pfn ) + return NULL; + new_pt_frame(&pt_pfn, pt_mfn, offset, L3_FRAME); + } + ASSERT(tab[offset] & _PAGE_PRESENT); + pt_mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(pt_mfn); +#endif + offset = l3_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + { + pt_pfn = virt_to_pfn(alloc_page()); + if ( !pt_pfn ) + return NULL; + new_pt_frame(&pt_pfn, pt_mfn, offset, L2_FRAME); + } + ASSERT(tab[offset] & _PAGE_PRESENT); + pt_mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(pt_mfn); + offset = l2_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + { + pt_pfn = virt_to_pfn(alloc_page()); + if ( !pt_pfn ) + return NULL; + new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME); + } + ASSERT(tab[offset] & _PAGE_PRESENT); + if ( tab[offset] & _PAGE_PSE ) + return &tab[offset]; + + pt_mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(pt_mfn); + + offset = l1_table_offset(va); + return &tab[offset]; +} + +/* + * Reserve an area of virtual address space for mappings and Heap + */ +static unsigned long demand_map_area_start; +static unsigned long demand_map_area_end; +#ifdef HAVE_LIBC +unsigned long heap, brk, heap_mapped, heap_end; +#endif + +void arch_init_demand_mapping_area(void) +{ + demand_map_area_start = VIRT_DEMAND_AREA; + demand_map_area_end = demand_map_area_start + DEMAND_MAP_PAGES * PAGE_SIZE; + printk("Demand map pfns at %lx-%lx.\n", demand_map_area_start, + demand_map_area_end); + +#ifdef HAVE_LIBC + heap_mapped = brk = heap = VIRT_HEAP_AREA; + heap_end = heap_mapped + HEAP_PAGES * PAGE_SIZE; + printk("Heap resides at %lx-%lx.\n", brk, heap_end); +#endif +} + +unsigned long allocate_ondemand(unsigned long n, unsigned long alignment) +{ + unsigned long x; + unsigned long y = 0; + + /* Find a properly aligned run of n contiguous frames */ + for ( x = 0; + x <= DEMAND_MAP_PAGES - n; + x = (x + y + 1 + alignment - 1) & ~(alignment - 1) ) + { + unsigned long addr = demand_map_area_start + x * PAGE_SIZE; + pgentry_t *pgt = get_pgt(addr); + for ( y = 0; y < n; y++, addr += PAGE_SIZE ) + { + if ( !(addr & L1_MASK) ) + pgt = get_pgt(addr); + if ( pgt ) + { + if ( *pgt & _PAGE_PRESENT ) + break; + pgt++; + } + } + if ( y == n ) + break; + } + if ( y != n ) + { + printk("Failed to find %ld frames!\n", n); + return 0; + } + return demand_map_area_start + x * PAGE_SIZE; +} + +/* + * Map an array of MFNs contiguously into virtual address space starting at + * va. map f[i*stride]+i*increment for i in 0..n-1. + */ +#define MAP_BATCH ((STACK_SIZE / 2) / sizeof(mmu_update_t)) +int do_map_frames(unsigned long va, + const unsigned long *mfns, unsigned long n, + unsigned long stride, unsigned long incr, + domid_t id, int *err, unsigned long prot) +{ + pgentry_t *pgt = NULL; + unsigned long done = 0; + + if ( !mfns ) + { + printk("do_map_frames: no mfns supplied\n"); + return -EINVAL; + } + DEBUG("va=%p n=0x%lx, mfns[0]=0x%lx stride=0x%lx incr=0x%lx prot=0x%lx\n", + va, n, mfns[0], stride, incr, prot); + + if ( err ) + memset(err, 0x00, n * sizeof(int)); + while ( done < n ) + { +#ifdef CONFIG_PARAVIRT + unsigned long i; + int rc; + unsigned long todo; + + if ( err ) + todo = 1; + else + todo = n - done; + + if ( todo > MAP_BATCH ) + todo = MAP_BATCH; + + { + mmu_update_t mmu_updates[todo]; + + for ( i = 0; i < todo; i++, va += PAGE_SIZE, pgt++) + { + if ( !pgt || !(va & L1_MASK) ) + pgt = need_pgt(va); + if ( !pgt ) + return -ENOMEM; + + mmu_updates[i].ptr = virt_to_mach(pgt) | MMU_NORMAL_PT_UPDATE; + mmu_updates[i].val = ((pgentry_t)(mfns[(done + i) * stride] + + (done + i) * incr) + << PAGE_SHIFT) | prot; + } + + rc = HYPERVISOR_mmu_update(mmu_updates, todo, NULL, id); + if ( rc < 0 ) + { + if (err) + err[done * stride] = rc; + else { + printk("Map %ld (%lx, ...) at %lx failed: %d.\n", + todo, mfns[done * stride] + done * incr, va, rc); + do_exit(); + } + } + } + done += todo; +#else + if ( !pgt || !(va & L1_MASK) ) + pgt = need_pgt(va & ~L1_MASK); + if ( !pgt ) + return -ENOMEM; + + ASSERT(!(*pgt & _PAGE_PSE)); + pgt[l1_table_offset(va)] = (pgentry_t) + (((mfns[done * stride] + done * incr) << PAGE_SHIFT) | prot); + done++; +#endif + } + + return 0; +} + +/* + * Map an array of MFNs contiguous into virtual address space. Virtual + * addresses are allocated from the on demand area. + */ +void *map_frames_ex(const unsigned long *mfns, unsigned long n, + unsigned long stride, unsigned long incr, + unsigned long alignment, + domid_t id, int *err, unsigned long prot) +{ + unsigned long va = allocate_ondemand(n, alignment); + + if ( !va ) + return NULL; + + if ( do_map_frames(va, mfns, n, stride, incr, id, err, prot) ) + return NULL; + + return (void *)va; +} + +/* + * Unmap nun_frames frames mapped at virtual address va. + */ +#define UNMAP_BATCH ((STACK_SIZE / 2) / sizeof(multicall_entry_t)) +int unmap_frames(unsigned long va, unsigned long num_frames) +{ +#ifdef CONFIG_PARAVIRT + int n = UNMAP_BATCH; + multicall_entry_t call[n]; + int ret; + int i; +#else + pgentry_t *pgt; +#endif + + ASSERT(!((unsigned long)va & ~PAGE_MASK)); + + DEBUG("va=%p, num=0x%lx\n", va, num_frames); + + while ( num_frames ) { +#ifdef CONFIG_PARAVIRT + if ( n > num_frames ) + n = num_frames; + + for ( i = 0; i < n; i++ ) + { + int arg = 0; + /* simply update the PTE for the VA and invalidate TLB */ + call[i].op = __HYPERVISOR_update_va_mapping; + call[i].args[arg++] = va; + call[i].args[arg++] = 0; +#ifdef __i386__ + call[i].args[arg++] = 0; +#endif + call[i].args[arg++] = UVMF_INVLPG; + + va += PAGE_SIZE; + } + + ret = HYPERVISOR_multicall(call, n); + if ( ret ) + { + printk("update_va_mapping hypercall failed with rc=%d.\n", ret); + return -ret; + } + + for ( i = 0; i < n; i++ ) + { + if ( call[i].result ) + { + printk("update_va_mapping failed for with rc=%d.\n", ret); + return -(call[i].result); + } + } + num_frames -= n; +#else + pgt = get_pgt(va); + if ( pgt ) + { + ASSERT(!(*pgt & _PAGE_PSE)); + *pgt = 0; + invlpg(va); + } + va += PAGE_SIZE; + num_frames--; +#endif + } + return 0; +} + +/* + * Clear some of the bootstrap memory + */ +static void clear_bootstrap(void) +{ +#ifdef CONFIG_PARAVIRT + pte_t nullpte = { }; + int rc; +#else + pgentry_t *pgt; +#endif + + /* Use first page as the CoW zero page */ + memset(&_text, 0, PAGE_SIZE); + mfn_zero = virt_to_mfn((unsigned long) &_text); +#ifdef CONFIG_PARAVIRT + if ( (rc = HYPERVISOR_update_va_mapping(0, nullpte, UVMF_INVLPG)) ) + printk("Unable to unmap NULL page. rc=%d\n", rc); +#else + pgt = get_pgt((unsigned long)&_text); + *pgt = 0; + invlpg((unsigned long)&_text); +#endif +} + +#ifdef CONFIG_PARAVIRT +void p2m_chk_pfn(unsigned long pfn) +{ + if ( (pfn >> L3_P2M_SHIFT) > 0 ) + { + printk("Error: Too many pfns.\n"); + do_exit(); + } +} + +void arch_init_p2m(unsigned long max_pfn) +{ + unsigned long *l2_list = NULL, *l3_list; + unsigned long pfn; + + p2m_chk_pfn(max_pfn - 1); + l3_list = (unsigned long *)alloc_page(); + for ( pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES ) + { + if ( !(pfn % (P2M_ENTRIES * P2M_ENTRIES)) ) + { + l2_list = (unsigned long*)alloc_page(); + l3_list[L3_P2M_IDX(pfn)] = virt_to_mfn(l2_list); + } + l2_list[L2_P2M_IDX(pfn)] = virt_to_mfn(phys_to_machine_mapping + pfn); + } + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = + virt_to_mfn(l3_list); + HYPERVISOR_shared_info->arch.max_pfn = max_pfn; + + arch_remap_p2m(max_pfn); +} +#endif + +void arch_init_mm(unsigned long* start_pfn_p, unsigned long* max_pfn_p) +{ + unsigned long start_pfn, max_pfn; + + printk(" _text: %p(VA)\n", &_text); + printk(" _etext: %p(VA)\n", &_etext); + printk(" _erodata: %p(VA)\n", &_erodata); + printk(" _edata: %p(VA)\n", &_edata); + printk("stack start: %p(VA)\n", stack); + printk(" _end: %p(VA)\n", &_end); + + /* First page follows page table pages. */ + start_pfn = first_free_pfn; + max_pfn = last_free_pfn; + + if ( max_pfn >= MAX_MEM_SIZE / PAGE_SIZE ) + max_pfn = MAX_MEM_SIZE / PAGE_SIZE - 1; + + printk(" start_pfn: %lx\n", start_pfn); + printk(" max_pfn: %lx\n", max_pfn); + + build_pagetable(&start_pfn, &max_pfn); + clear_bootstrap(); + set_readonly(&_text, &_erodata); + + *start_pfn_p = start_pfn; + *max_pfn_p = max_pfn; + +#ifndef CONFIG_PARAVIRT +#ifdef __x86_64__ + BUILD_BUG_ON(l4_table_offset(VIRT_KERNEL_AREA) != 1 || + l3_table_offset(VIRT_KERNEL_AREA) != 0 || + l2_table_offset(VIRT_KERNEL_AREA) != 0); +#else + BUILD_BUG_ON(l3_table_offset(VIRT_KERNEL_AREA) != 0 || + l2_table_offset(VIRT_KERNEL_AREA) == 0); +#endif +#endif +} + +grant_entry_v1_t *arch_init_gnttab(int nr_grant_frames) +{ + struct gnttab_setup_table setup; + unsigned long frames[nr_grant_frames]; + + setup.dom = DOMID_SELF; + setup.nr_frames = nr_grant_frames; + set_xen_guest_handle(setup.frame_list, frames); + + HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); + return map_frames(frames, nr_grant_frames); +} + +unsigned long alloc_virt_kernel(unsigned n_pages) +{ + unsigned long addr; + + addr = virt_kernel_area_end; + virt_kernel_area_end += PAGE_SIZE * n_pages; + ASSERT(virt_kernel_area_end <= VIRT_DEMAND_AREA); + + return addr; +} + +unsigned long map_frame_virt(unsigned long mfn) +{ + unsigned long addr; + + addr = alloc_virt_kernel(1); + if ( map_frame_rw(addr, mfn) ) + return 0; + + return addr; +} diff -Nru xen-4.9.0/extras/mini-os/arch/x86/sched.c xen-4.9.2/extras/mini-os/arch/x86/sched.c --- xen-4.9.0/extras/mini-os/arch/x86/sched.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/sched.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,167 @@ +/* + **************************************************************************** + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: sched.c + * Author: Grzegorz Milos + * Changes: Robert Kaiser + * + * Date: Aug 2005 + * + * Environment: Xen Minimal OS + * Description: simple scheduler for Mini-Os + * + * The scheduler is non-preemptive (cooperative), and schedules according + * to Round Robin algorithm. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifdef SCHED_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=sched.c, line=%d) " _f "\n", __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + + +void dump_stack(struct thread *thread) +{ + unsigned long *bottom = (unsigned long *)(thread->stack + STACK_SIZE); + unsigned long *pointer = (unsigned long *)thread->sp; + int count; + if(thread == current) + { +#ifdef __i386__ + asm("movl %%esp,%0" + : "=r"(pointer)); +#else + asm("movq %%rsp,%0" + : "=r"(pointer)); +#endif + } + printk("The stack for \"%s\"\n", thread->name); + for(count = 0; count < 25 && pointer < bottom; count ++) + { + printk("[0x%p] 0x%lx\n", pointer, *pointer); + pointer++; + } + + if(pointer < bottom) printk(" ... continues.\n"); +} + +/* Gets run when a new thread is scheduled the first time ever, + defined in x86_[32/64].S */ +extern void thread_starter(void); + +/* Pushes the specified value onto the stack of the specified thread */ +static void stack_push(struct thread *thread, unsigned long value) +{ + thread->sp -= sizeof(unsigned long); + *((unsigned long *)thread->sp) = value; +} + +/* Architecture specific setup of thread creation */ +struct thread* arch_create_thread(char *name, void (*function)(void *), + void *data) +{ + struct thread *thread; + + thread = xmalloc(struct thread); + /* We can't use lazy allocation here since the trap handler runs on the stack */ + thread->stack = (char *)alloc_pages(STACK_SIZE_PAGE_ORDER); + thread->name = name; + printk("Thread \"%s\": pointer: 0x%p, stack: 0x%p\n", name, thread, + thread->stack); + + thread->sp = (unsigned long)thread->stack + STACK_SIZE; + /* Save pointer to the thread on the stack, used by current macro */ + *((unsigned long *)thread->stack) = (unsigned long)thread; + + /* Must ensure that (%rsp + 8) is 16-byte aligned at the start of thread_starter. */ + thread->sp -= sizeof(unsigned long); + + stack_push(thread, (unsigned long) function); + stack_push(thread, (unsigned long) data); + thread->ip = (unsigned long) thread_starter; + return thread; +} + +void run_idle_thread(void) +{ + /* Switch stacks and run the thread */ +#if defined(__i386__) + __asm__ __volatile__("mov %0,%%esp\n\t" + "push %1\n\t" + "ret" + :"=m" (idle_thread->sp) + :"m" (idle_thread->ip)); +#elif defined(__x86_64__) + __asm__ __volatile__("mov %0,%%rsp\n\t" + "push %1\n\t" + "ret" + :"=m" (idle_thread->sp) + :"m" (idle_thread->ip)); +#endif +} + +unsigned long __local_irq_save(void) +{ + unsigned long flags; + + local_irq_save(flags); + return flags; +} + +void __local_irq_restore(unsigned long flags) +{ + local_irq_restore(flags); +} + +unsigned long __local_save_flags(void) +{ + unsigned long flags; + + local_save_flags(flags); + return flags; +} + +void __local_irq_disable(void) +{ + local_irq_disable(); +} + +void __local_irq_enable(void) +{ + local_irq_enable(); +} diff -Nru xen-4.9.0/extras/mini-os/arch/x86/setup.c xen-4.9.2/extras/mini-os/arch/x86/setup.c --- xen-4.9.0/extras/mini-os/arch/x86/setup.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/setup.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,224 @@ +/****************************************************************************** + * common.c + * + * Common stuff special to x86 goes here. + * + * Copyright (c) 2002-2003, K A Fraser & R Neugebauer + * Copyright (c) 2005, Grzegorz Milos, Intel Research Cambridge + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include /* for printk, memcpy */ +#include +#include +#include +#include + +#ifdef CONFIG_PARAVIRT +/* + * This structure contains start-of-day info, such as pagetable base pointer, + * address of the shared_info structure, and things like that. + */ +union start_info_union start_info_union; +#endif + +/* + * Shared page for communicating with the hypervisor. + * Events flags go here, for example. + */ +shared_info_t *HYPERVISOR_shared_info; + +/* + * Just allocate the kernel stack here. SS:ESP is set up to point here + * in head.S. + */ +char stack[2*STACK_SIZE]; + +extern char shared_info[PAGE_SIZE]; + +#if defined(__x86_64__) +#define __pte(x) ((pte_t) { (x) } ) +#else +#define __pte(x) ({ unsigned long long _x = (x); \ + ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) +#endif + +static inline void fpu_init(void) { + asm volatile("fninit"); +} + +#ifdef __SSE__ +static inline void sse_init(void) { + unsigned long status = 0x1f80; + asm volatile("ldmxcsr %0" : : "m" (status)); +} +#else +#define sse_init() +#endif + +#ifdef CONFIG_PARAVIRT +#define hpc_init() + +shared_info_t *map_shared_info(void *p) +{ + int rc; + start_info_t *si = p; + unsigned long pa = si->shared_info; + + if ( (rc = HYPERVISOR_update_va_mapping((unsigned long)shared_info, + __pte(pa | 7), UVMF_INVLPG)) ) + { + printk("Failed to map shared_info!! rc=%d\n", rc); + do_exit(); + } + return (shared_info_t *)shared_info; +} + +static void get_cmdline(void *p) +{ + start_info_t *si = p; + + strncpy(cmdline, (char *)si->cmd_line, MAX_CMDLINE_SIZE - 1); +} + +static void print_start_of_day(void *p) +{ + start_info_t *si = p; + + printk("Xen Minimal OS (pv)!\n"); + printk(" start_info: %p(VA)\n", si); + printk(" nr_pages: 0x%lx\n", si->nr_pages); + printk(" shared_inf: 0x%08lx(MA)\n", si->shared_info); + printk(" pt_base: %p(VA)\n", (void *)si->pt_base); + printk("nr_pt_frames: 0x%lx\n", si->nr_pt_frames); + printk(" mfn_list: %p(VA)\n", (void *)si->mfn_list); + printk(" mod_start: 0x%lx(VA)\n", si->mod_start); + printk(" mod_len: %lu\n", si->mod_len); + printk(" flags: 0x%x\n", (unsigned int)si->flags); + printk(" cmd_line: %s\n", cmdline); + printk(" stack: %p-%p\n", stack, stack + sizeof(stack)); +} +#else +static void hpc_init(void) +{ + uint32_t eax, ebx, ecx, edx, base; + + for ( base = XEN_CPUID_FIRST_LEAF; + base < XEN_CPUID_FIRST_LEAF + 0x10000; base += 0x100 ) + { + cpuid(base, &eax, &ebx, &ecx, &edx); + + if ( (ebx == XEN_CPUID_SIGNATURE_EBX) && + (ecx == XEN_CPUID_SIGNATURE_ECX) && + (edx == XEN_CPUID_SIGNATURE_EDX) && + ((eax - base) >= 2) ) + break; + } + + cpuid(base + 2, &eax, &ebx, &ecx, &edx); + wrmsrl(ebx, (unsigned long)&hypercall_page); + barrier(); +} + +static void get_cmdline(void *p) +{ + struct hvm_start_info *si = p; + + if ( si->cmdline_paddr ) + strncpy(cmdline, to_virt(si->cmdline_paddr), MAX_CMDLINE_SIZE - 1); +} + +static void print_start_of_day(void *p) +{ + struct hvm_start_info *si = p; + + printk("Xen Minimal OS (hvm)!\n"); + printk(" start_info: %p(VA)\n", si); + printk(" shared_inf: %p(VA)\n", HYPERVISOR_shared_info); + printk(" modlist: 0x%lx(PA)\n", (unsigned long)si->modlist_paddr); + printk(" nr_modules: %u\n", si->nr_modules); + printk(" flags: 0x%x\n", (unsigned int)si->flags); + printk(" cmd_line: %s\n", cmdline); + printk(" stack: %p-%p\n", stack, stack + sizeof(stack)); + arch_print_memmap(); +} +#endif + +/* + * INITIAL C ENTRY POINT. + */ +void +arch_init(void *par) +{ + static char hello[] = "Bootstrapping...\n"; + + hpc_init(); + (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(hello), hello); + + trap_init(); + + /*Initialize floating point unit */ + fpu_init(); + + /* Initialize SSE */ + sse_init(); + + /* Setup memory management info from start_info. */ + arch_mm_preinit(par); + + /* WARN: don't do printk before here, it uses information from + shared_info. Use xprintk instead. */ + get_console(par); + get_xenbus(par); + get_cmdline(par); + + /* Grab the shared_info pointer and put it in a safe place. */ + HYPERVISOR_shared_info = map_shared_info(par); + + /* print out some useful information */ + print_start_of_day(par); + +#ifdef CONFIG_PARAVIRT + memcpy(&start_info, par, sizeof(start_info)); +#endif + + start_kernel(); +} + +void +arch_fini(void) +{ + /* Reset traps */ + trap_fini(); + +#ifdef __i386__ + HYPERVISOR_set_callbacks(0, 0, 0, 0); +#else + HYPERVISOR_set_callbacks(0, 0, 0); +#endif +} + +void +arch_do_exit(void) +{ + stack_walk(); +} diff -Nru xen-4.9.0/extras/mini-os/arch/x86/testbuild/all-no xen-4.9.2/extras/mini-os/arch/x86/testbuild/all-no --- xen-4.9.0/extras/mini-os/arch/x86/testbuild/all-no 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/testbuild/all-no 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,18 @@ +CONFIG_PARAVIRT = n +CONFIG_START_NETWORK = n +CONFIG_SPARSE_BSS = n +CONFIG_QEMU_XS_ARGS = n +CONFIG_TEST = n +CONFIG_PCIFRONT = n +CONFIG_BLKFRONT = n +CONFIG_TPMFRONT = n +CONFIG_TPM_TIS = n +CONFIG_TPMBACK = n +CONFIG_NETFRONT = n +CONFIG_FBFRONT = n +CONFIG_KBDFRONT = n +CONFIG_CONSFRONT = n +CONFIG_XENBUS = n +CONFIG_XC = n +CONFIG_LWIP = n +CONFIG_BALLOON = n diff -Nru xen-4.9.0/extras/mini-os/arch/x86/testbuild/all-yes xen-4.9.2/extras/mini-os/arch/x86/testbuild/all-yes --- xen-4.9.0/extras/mini-os/arch/x86/testbuild/all-yes 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/testbuild/all-yes 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,19 @@ +CONFIG_PARAVIRT = y +CONFIG_START_NETWORK = y +CONFIG_SPARSE_BSS = y +CONFIG_QEMU_XS_ARGS = y +CONFIG_TEST = y +CONFIG_PCIFRONT = y +CONFIG_BLKFRONT = y +CONFIG_TPMFRONT = y +CONFIG_TPM_TIS = y +CONFIG_TPMBACK = y +CONFIG_NETFRONT = y +CONFIG_FBFRONT = y +CONFIG_KBDFRONT = y +CONFIG_CONSFRONT = y +CONFIG_XENBUS = y +CONFIG_XC = y +# LWIP is special: it needs support from outside +CONFIG_LWIP = n +CONFIG_BALLOON = y diff -Nru xen-4.9.0/extras/mini-os/arch/x86/testbuild/balloon xen-4.9.2/extras/mini-os/arch/x86/testbuild/balloon --- xen-4.9.0/extras/mini-os/arch/x86/testbuild/balloon 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/testbuild/balloon 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,2 @@ +CONFIG_PARAVIRT = n +CONFIG_BALLOON = y diff -Nru xen-4.9.0/extras/mini-os/arch/x86/testbuild/newxen xen-4.9.2/extras/mini-os/arch/x86/testbuild/newxen --- xen-4.9.0/extras/mini-os/arch/x86/testbuild/newxen 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/testbuild/newxen 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1 @@ +XEN_INTERFACE_VERSION=__XEN_LATEST_INTERFACE_VERSION__ diff -Nru xen-4.9.0/extras/mini-os/arch/x86/testbuild/newxen-yes xen-4.9.2/extras/mini-os/arch/x86/testbuild/newxen-yes --- xen-4.9.0/extras/mini-os/arch/x86/testbuild/newxen-yes 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/testbuild/newxen-yes 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,20 @@ +CONFIG_PARAVIRT = y +CONFIG_START_NETWORK = y +CONFIG_SPARSE_BSS = y +CONFIG_QEMU_XS_ARGS = y +CONFIG_TEST = y +CONFIG_PCIFRONT = y +CONFIG_BLKFRONT = y +CONFIG_TPMFRONT = y +CONFIG_TPM_TIS = y +CONFIG_TPMBACK = y +CONFIG_NETFRONT = y +CONFIG_FBFRONT = y +CONFIG_KBDFRONT = y +CONFIG_CONSFRONT = y +CONFIG_XENBUS = y +CONFIG_XC = y +# LWIP is special: it needs support from outside +CONFIG_LWIP = n +CONFIG_BALLOON = y +XEN_INTERFACE_VERSION=__XEN_LATEST_INTERFACE_VERSION__ diff -Nru xen-4.9.0/extras/mini-os/arch/x86/time.c xen-4.9.2/extras/mini-os/arch/x86/time.c --- xen-4.9.0/extras/mini-os/arch/x86/time.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/time.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,246 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2002-2003 - Keir Fraser - University of Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + * (C) 2006 - Robert Kaiser - FH Wiesbaden + **************************************************************************** + * + * File: time.c + * Author: Rolf Neugebauer and Keir Fraser + * Changes: Grzegorz Milos + * + * Description: Simple time and timer functions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include +#include +#include +#include +#include +#include +#include + +/************************************************************************ + * Time functions + *************************************************************************/ + +/* These are peridically updated in shared_info, and then copied here. */ +struct shadow_time_info { + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_timestamp; /* Time, in nanosecs, since boot. */ + uint32_t tsc_to_nsec_mul; + uint32_t tsc_to_usec_mul; + int tsc_shift; + uint32_t version; +}; +static struct timespec shadow_ts; +static uint32_t shadow_ts_version; + +static struct shadow_time_info shadow; + + +#ifndef rmb +#define rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#endif + +#define HANDLE_USEC_OVERFLOW(_tv) \ + do { \ + while ( (_tv)->tv_usec >= 1000000 ) \ + { \ + (_tv)->tv_usec -= 1000000; \ + (_tv)->tv_sec++; \ + } \ + } while ( 0 ) + +static inline int time_values_up_to_date(void) +{ + struct vcpu_time_info *src = &HYPERVISOR_shared_info->vcpu_info[0].time; + + return (shadow.version == src->version); +} + +static inline int wc_values_up_to_date(void) +{ + shared_info_t *s= HYPERVISOR_shared_info; + + return (shadow_ts_version == s->wc_version); +} + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline uint64_t scale_delta(uint64_t delta, uint32_t mul_frac, int shift) +{ + uint64_t product; +#ifdef __i386__ + uint32_t tmp1, tmp2; +#endif + + if ( shift < 0 ) + delta >>= -shift; + else + delta <<= shift; + +#ifdef __i386__ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "add %4,%%eax ; " + "xor %5,%5 ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), "2" (mul_frac) ); +#else + __asm__ ( + "mul %%rdx ; shrd $32,%%rdx,%%rax" + : "=a" (product) : "0" (delta), "d" ((uint64_t)mul_frac) ); +#endif + + return product; +} + + +static unsigned long get_nsec_offset(void) +{ + uint64_t now, delta; + rdtscll(now); + delta = now - shadow.tsc_timestamp; + return scale_delta(delta, shadow.tsc_to_nsec_mul, shadow.tsc_shift); +} + + +static void get_time_values_from_xen(void) +{ + struct vcpu_time_info *src = &HYPERVISOR_shared_info->vcpu_info[0].time; + + do { + shadow.version = src->version; + rmb(); + shadow.tsc_timestamp = src->tsc_timestamp; + shadow.system_timestamp = src->system_time; + shadow.tsc_to_nsec_mul = src->tsc_to_system_mul; + shadow.tsc_shift = src->tsc_shift; + rmb(); + } + while ((src->version & 1) | (shadow.version ^ src->version)); + + shadow.tsc_to_usec_mul = shadow.tsc_to_nsec_mul / 1000; +} + + + + +/* monotonic_clock(): returns # of nanoseconds passed since time_init() + * Note: This function is required to return accurate + * time even in the absence of multiple timer ticks. + */ +uint64_t monotonic_clock(void) +{ + uint64_t time; + uint32_t local_time_version; + + do { + local_time_version = shadow.version; + rmb(); + time = shadow.system_timestamp + get_nsec_offset(); + if (!time_values_up_to_date()) + get_time_values_from_xen(); + rmb(); + } while (local_time_version != shadow.version); + + return time; +} + +static void update_wallclock(void) +{ + shared_info_t *s = HYPERVISOR_shared_info; + + do { + shadow_ts_version = s->wc_version; + rmb(); + shadow_ts.tv_sec = s->wc_sec; + shadow_ts.tv_nsec = s->wc_nsec; + rmb(); + } + while ((s->wc_version & 1) | (shadow_ts_version ^ s->wc_version)); +} + + +int gettimeofday(struct timeval *tv, void *tz) +{ + uint64_t nsec = monotonic_clock(); + + if (!wc_values_up_to_date()) + update_wallclock(); + + nsec += shadow_ts.tv_nsec; + + tv->tv_sec = shadow_ts.tv_sec; + tv->tv_sec += NSEC_TO_SEC(nsec); + tv->tv_usec = NSEC_TO_USEC(nsec % 1000000000UL); + + return 0; +} + + +void block_domain(s_time_t until) +{ + ASSERT(irqs_disabled()); + if(monotonic_clock() < until) + { + HYPERVISOR_set_timer_op(until); +#ifdef CONFIG_PARAVIRT + HYPERVISOR_sched_op(SCHEDOP_block, 0); +#else + local_irq_enable(); + asm volatile ( "hlt" : : : "memory" ); +#endif + local_irq_disable(); + HYPERVISOR_set_timer_op(0); + } +} + +static void timer_handler(evtchn_port_t ev, struct pt_regs *regs, void *ign) +{ + HYPERVISOR_set_timer_op(monotonic_clock() + MILLISECS(1)); +} + + + +static evtchn_port_t port; +void init_time(void) +{ + printk("Initialising timer interface\n"); + port = bind_virq(VIRQ_TIMER, &timer_handler, NULL); + unmask_evtchn(port); +} + +void fini_time(void) +{ + /* Clear any pending timer */ + HYPERVISOR_set_timer_op(0); + unbind_evtchn(port); +} diff -Nru xen-4.9.0/extras/mini-os/arch/x86/traps.c xen-4.9.2/extras/mini-os/arch/x86/traps.c --- xen-4.9.0/extras/mini-os/arch/x86/traps.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/traps.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,429 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * These are assembler stubs in entry.S. + * They are the actual entry points for virtual exceptions. + */ +void divide_error(void); +void debug(void); +void int3(void); +void overflow(void); +void bounds(void); +void invalid_op(void); +void device_not_available(void); +void coprocessor_segment_overrun(void); +void invalid_TSS(void); +void segment_not_present(void); +void stack_segment(void); +void general_protection(void); +void page_fault(void); +void coprocessor_error(void); +void simd_coprocessor_error(void); +void alignment_check(void); +void spurious_interrupt_bug(void); +void machine_check(void); + + +void dump_regs(struct pt_regs *regs) +{ + printk("Thread: %s\n", current ? current->name : "*NONE*"); +#ifdef __i386__ + printk("EIP: %lx, EFLAGS %lx.\n", regs->eip, regs->eflags); + printk("EBX: %08lx ECX: %08lx EDX: %08lx\n", + regs->ebx, regs->ecx, regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx EAX: %08lx\n", + regs->esi, regs->edi, regs->ebp, regs->eax); + printk("DS: %04x ES: %04x orig_eax: %08lx, eip: %08lx\n", + regs->xds, regs->xes, regs->orig_eax, regs->eip); + printk("CS: %04x EFLAGS: %08lx esp: %08lx ss: %04x\n", + regs->xcs, regs->eflags, regs->esp, regs->xss); +#else + printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); + printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", + regs->ss, regs->rsp, regs->eflags); + printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", + regs->rax, regs->rbx, regs->rcx); + printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", + regs->rdx, regs->rsi, regs->rdi); + printk("RBP: %016lx R08: %016lx R09: %016lx\n", + regs->rbp, regs->r8, regs->r9); + printk("R10: %016lx R11: %016lx R12: %016lx\n", + regs->r10, regs->r11, regs->r12); + printk("R13: %016lx R14: %016lx R15: %016lx\n", + regs->r13, regs->r14, regs->r15); +#endif +} + +static void do_trap(int trapnr, char *str, struct pt_regs * regs, unsigned long error_code) +{ + printk("FATAL: Unhandled Trap %d (%s), error code=0x%lx\n", trapnr, str, error_code); + printk("Regs address %p\n", regs); + dump_regs(regs); + do_exit(); +} + +#define DO_ERROR(trapnr, str, name) \ +void do_##name(struct pt_regs * regs, unsigned long error_code) \ +{ \ + do_trap(trapnr, str, regs, error_code); \ +} + +#define DO_ERROR_INFO(trapnr, str, name, sicode, siaddr) \ +void do_##name(struct pt_regs * regs, unsigned long error_code) \ +{ \ + do_trap(trapnr, str, regs, error_code); \ +} + +DO_ERROR_INFO( 0, "divide error", divide_error, FPE_INTDIV, regs->eip) +DO_ERROR( 3, "int3", int3) +DO_ERROR( 4, "overflow", overflow) +DO_ERROR( 5, "bounds", bounds) +DO_ERROR_INFO( 6, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR( 7, "device not available", device_not_available) +DO_ERROR( 9, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(10, "invalid TSS", invalid_TSS) +DO_ERROR(11, "segment not present", segment_not_present) +DO_ERROR(12, "stack segment", stack_segment) +DO_ERROR_INFO(17, "alignment check", alignment_check, BUS_ADRALN, 0) +DO_ERROR(18, "machine check", machine_check) + +void page_walk(unsigned long virt_address) +{ + pgentry_t *tab = pt_base, page; + unsigned long addr = virt_address; + printk("Pagetable walk from virt %lx, base %p:\n", virt_address, pt_base); + +#if defined(__x86_64__) + page = tab[l4_table_offset(addr)]; + tab = pte_to_virt(page); + printk(" L4 = %"PRIpte" (%p) [offset = %lx]\n", page, tab, l4_table_offset(addr)); +#endif + page = tab[l3_table_offset(addr)]; + tab = pte_to_virt(page); + printk(" L3 = %"PRIpte" (%p) [offset = %lx]\n", page, tab, l3_table_offset(addr)); + page = tab[l2_table_offset(addr)]; + tab = pte_to_virt(page); + printk(" L2 = %"PRIpte" (%p) [offset = %lx]\n", page, tab, l2_table_offset(addr)); + + page = tab[l1_table_offset(addr)]; + printk(" L1 = %"PRIpte" [offset = %lx]\n", page, l1_table_offset(addr)); + +} + +static int handle_cow(unsigned long addr) { + pgentry_t *tab = pt_base, page; + unsigned long new_page; +#ifdef CONFIG_PARAVIRT + int rc; +#endif + +#if defined(__x86_64__) + page = tab[l4_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + tab = pte_to_virt(page); +#endif + page = tab[l3_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + tab = pte_to_virt(page); + + page = tab[l2_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + if ( page & _PAGE_PSE ) + return 0; + tab = pte_to_virt(page); + + page = tab[l1_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + /* Only support CoW for the zero page. */ + if (PHYS_PFN(page) != mfn_zero) + return 0; + + new_page = alloc_pages(0); + memset((void*) new_page, 0, PAGE_SIZE); + +#ifdef CONFIG_PARAVIRT + rc = HYPERVISOR_update_va_mapping(addr & PAGE_MASK, __pte(virt_to_mach(new_page) | L1_PROT), UVMF_INVLPG); + if (!rc) + return 1; + + printk("Map zero page to %lx failed: %d.\n", addr, rc); + return 0; +#else + tab[l1_table_offset(addr)] = virt_to_mach(new_page) | L1_PROT; + invlpg(addr); + return 1; +#endif +} + +static void do_stack_walk(unsigned long frame_base) +{ + unsigned long *frame = (void*) frame_base; + printk("base is %#lx ", frame_base); + printk("caller is %#lx\n", frame[1]); + if (frame[0]) + do_stack_walk(frame[0]); +} + +void stack_walk(void) +{ + unsigned long bp; +#ifdef __x86_64__ + asm("movq %%rbp, %0":"=r"(bp)); +#else + asm("movl %%ebp, %0":"=r"(bp)); +#endif + do_stack_walk(bp); +} + +static void dump_mem(unsigned long addr) +{ + unsigned long i; + if (addr < PAGE_SIZE) + return; + + for (i = ((addr)-16 ) & ~15; i < (((addr)+48 ) & ~15); i++) + { + if (!(i%16)) + printk("\n%lx:", i); + printk(" %02x", *(unsigned char *)i); + } + printk("\n"); +} + +static int handling_pg_fault = 0; + +void do_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + unsigned long addr = read_cr2(); + struct sched_shutdown sched_shutdown = { .reason = SHUTDOWN_crash }; + + if ((error_code & TRAP_PF_WRITE) && handle_cow(addr)) + return; + + /* If we are already handling a page fault, and got another one + that means we faulted in pagetable walk. Continuing here would cause + a recursive fault */ + if(handling_pg_fault == 1) + { + printk("Page fault in pagetable walk (access to invalid memory?).\n"); + HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); + } + handling_pg_fault++; + barrier(); + +#if defined(__x86_64__) + printk("Page fault at linear address %lx, rip %lx, regs %p, sp %lx, our_sp %p, code %lx\n", + addr, regs->rip, regs, regs->rsp, &addr, error_code); +#else + printk("Page fault at linear address %lx, eip %lx, regs %p, sp %lx, our_sp %p, code %lx\n", + addr, regs->eip, regs, regs->esp, &addr, error_code); +#endif + + dump_regs(regs); +#if defined(__x86_64__) + do_stack_walk(regs->rbp); + dump_mem(regs->rsp); + dump_mem(regs->rbp); + dump_mem(regs->rip); +#else + do_stack_walk(regs->ebp); + dump_mem(regs->esp); + dump_mem(regs->ebp); + dump_mem(regs->eip); +#endif + page_walk(addr); + HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); + /* We should never get here ... but still */ + handling_pg_fault--; +} + +void do_general_protection(struct pt_regs *regs, long error_code) +{ + struct sched_shutdown sched_shutdown = { .reason = SHUTDOWN_crash }; +#ifdef __i386__ + printk("GPF eip: %lx, error_code=%lx\n", regs->eip, error_code); +#else + printk("GPF rip: %lx, error_code=%lx\n", regs->rip, error_code); +#endif + dump_regs(regs); +#if defined(__x86_64__) + do_stack_walk(regs->rbp); + dump_mem(regs->rsp); + dump_mem(regs->rbp); + dump_mem(regs->rip); +#else + do_stack_walk(regs->ebp); + dump_mem(regs->esp); + dump_mem(regs->ebp); + dump_mem(regs->eip); +#endif + HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); +} + + +void do_debug(struct pt_regs * regs) +{ + printk("Debug exception\n"); +#define TF_MASK 0x100 + regs->eflags &= ~TF_MASK; + dump_regs(regs); + do_exit(); +} + +void do_coprocessor_error(struct pt_regs * regs) +{ + printk("Copro error\n"); + dump_regs(regs); + do_exit(); +} + +void simd_math_error(void *eip) +{ + printk("SIMD error\n"); +} + +void do_simd_coprocessor_error(struct pt_regs * regs) +{ + printk("SIMD copro error\n"); +} + +void do_spurious_interrupt_bug(struct pt_regs * regs) +{ +} + +/* Assembler interface fns in entry.S. */ +void hypervisor_callback(void); +void failsafe_callback(void); + +#ifdef CONFIG_PARAVIRT +/* + * Submit a virtual IDT to teh hypervisor. This consists of tuples + * (interrupt vector, privilege ring, CS:EIP of handler). + * The 'privilege ring' field specifies the least-privileged ring that + * can trap to that vector using a software-interrupt instruction (INT). + */ +static trap_info_t trap_table[] = { + { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, + { 1, 0, __KERNEL_CS, (unsigned long)debug }, + { 3, 3, __KERNEL_CS, (unsigned long)int3 }, + { 4, 3, __KERNEL_CS, (unsigned long)overflow }, + { 5, 3, __KERNEL_CS, (unsigned long)bounds }, + { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, + { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, + { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, + { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, + { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, + { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, + { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, + { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, + { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug }, + { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, + { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, + { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, + { 0, 0, 0, 0 } +}; + + + +void trap_init(void) +{ + HYPERVISOR_set_trap_table(trap_table); + +#ifdef __i386__ + HYPERVISOR_set_callbacks( + __KERNEL_CS, (unsigned long)hypervisor_callback, + __KERNEL_CS, (unsigned long)failsafe_callback); +#else + HYPERVISOR_set_callbacks( + (unsigned long)hypervisor_callback, + (unsigned long)failsafe_callback, 0); +#endif +} + +void trap_fini(void) +{ + HYPERVISOR_set_trap_table(NULL); +} +#else + +#define INTR_STACK_SIZE PAGE_SIZE +static uint8_t intr_stack[INTR_STACK_SIZE] __attribute__((aligned(16))); + +hw_tss tss __attribute__((aligned(16))) = +{ +#if defined(__i386__) + .esp0 = (unsigned long)&intr_stack[INTR_STACK_SIZE], + .ss0 = __KERN_DS, +#elif defined(__x86_64__) + .rsp0 = (unsigned long)&intr_stack[INTR_STACK_SIZE], +#endif + .iopb = X86_TSS_INVALID_IO_BITMAP, +}; + +static void setup_gate(unsigned int entry, void *addr, unsigned int dpl) +{ + idt[entry].offset0 = (unsigned long)addr & 0xffff; + idt[entry].selector = __KERN_CS; + idt[entry]._r0 = 0; + idt[entry].type = 14; + idt[entry].s = 0; + idt[entry].dpl = dpl; + idt[entry].p = 1; + idt[entry].offset1 = ((unsigned long)addr >> 16) & 0xffff; +#if defined(__x86_64__) + idt[entry].ist = 0; + idt[entry].offset2 = ((unsigned long)addr >> 32) & 0xffffffffu; + idt[entry]._r1 = 0; +#endif +} + +void trap_init(void) +{ + setup_gate(TRAP_divide_error, ÷_error, 0); + setup_gate(TRAP_debug, &debug, 0); + setup_gate(TRAP_int3, &int3, 3); + setup_gate(TRAP_overflow, &overflow, 3); + setup_gate(TRAP_bounds, &bounds, 0); + setup_gate(TRAP_invalid_op, &invalid_op, 0); + setup_gate(TRAP_no_device, &device_not_available, 0); + setup_gate(TRAP_copro_seg, &coprocessor_segment_overrun, 0); + setup_gate(TRAP_invalid_tss, &invalid_TSS, 0); + setup_gate(TRAP_no_segment, &segment_not_present, 0); + setup_gate(TRAP_stack_error, &stack_segment, 0); + setup_gate(TRAP_gp_fault, &general_protection, 0); + setup_gate(TRAP_page_fault, &page_fault, 0); + setup_gate(TRAP_spurious_int, &spurious_interrupt_bug, 0); + setup_gate(TRAP_copro_error, &coprocessor_error, 0); + setup_gate(TRAP_alignment_check, &alignment_check, 0); + setup_gate(TRAP_simd_error, &simd_coprocessor_error, 0); + setup_gate(TRAP_xen_callback, hypervisor_callback, 0); + + asm volatile ("lidt idt_ptr"); + + gdt[GDTE_TSS] = (typeof(*gdt))INIT_GDTE((unsigned long)&tss, 0x67, 0x89); + asm volatile ("ltr %w0" :: "rm" (GDTE_TSS * 8)); + + if ( hvm_set_parameter(HVM_PARAM_CALLBACK_IRQ, + (2ULL << 56) | TRAP_xen_callback) ) + { + xprintk("Request for Xen HVM callback vector failed\n"); + do_exit(); + } +} + +void trap_fini(void) +{ +} +#endif diff -Nru xen-4.9.0/extras/mini-os/arch/x86/x86_32.S xen-4.9.2/extras/mini-os/arch/x86/x86_32.S --- xen-4.9.0/extras/mini-os/arch/x86/x86_32.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/x86_32.S 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,331 @@ +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PARAVIRT + +#define KERNEL_DS __KERNEL_DS + +ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "Mini-OS") +ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") +ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _WORD hypercall_page) +ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") +ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") +.text + +.globl _start + +_start: + lss stack_start,%esp +#else + +#define KERNEL_DS __KERN_DS + +#include "x86_hvm.S" + movl stack_start,%esp + +#endif + cld + andl $(~(__STACK_SIZE-1)), %esp + push %esi + call arch_init + +stack_start: + .long stack+(2*__STACK_SIZE), __KERNEL_SS + +.globl shared_info, hypercall_page + /* Unpleasant -- the PTE that maps this page is actually overwritten */ + /* to map the real shared-info page! :-) */ + .align __PAGE_SIZE +shared_info: + .fill __PAGE_SIZE,1,0 + +hypercall_page: + .fill __PAGE_SIZE,1,0 + +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C + +#define ENTRY(X) .globl X ; X : + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ + popl %ds; \ + popl %es; \ + addl $4,%esp; \ + iret; + +ENTRY(divide_error) + pushl $0 # no error code + pushl $do_divide_error +do_exception: + pushl %ds + pushl %eax + xorl %eax, %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es, %ecx + movl ES(%esp), %edi # get the function address + movl ORIG_EAX(%esp), %edx # get the error code + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl $(KERNEL_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + movl %esp,%eax # pt_regs pointer + pushl %edx + pushl %eax + call *%edi + jmp ret_from_exception + +ret_from_exception: + addl $8,%esp + RESTORE_ALL + +#ifdef CONFIG_PARAVIRT +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until weve done all processing. HOWEVER, we must enable events before +# popping the stack frame (cant be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so wed +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +ENTRY(hypervisor_callback) + pushl %eax + SAVE_ALL + movl EIP(%esp),%eax + cmpl $scrit,%eax + jb 11f + cmpl $ecrit,%eax + jb critical_region_fixup +11: push %esp + xorl %ebp,%ebp + call do_hypervisor_callback + add $4,%esp + movl HYPERVISOR_shared_info,%esi + movb $0,1(%esi) # reenable event callbacks +scrit: /**** START OF CRITICAL REGION ****/ + testb $0xFF,(%esi) + jnz 14f # process more events if necessary... + RESTORE_ALL +14: movb $1,1(%esi) + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + mov %esp,%esi + add %eax,%esi # %esi points at end of src region + mov %esp,%edi + add $0x34,%edi # %edi points at end of dst region + mov %eax,%ecx + shr $2,%ecx # convert words to bytes + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp 11b + +critical_fixup_table: + .byte 0x00,0x00,0x00 # testb $0xff,(%esi) + .byte 0x00,0x00 # jne 14f + .byte 0x00 # pop %ebx + .byte 0x04 # pop %ecx + .byte 0x08 # pop %edx + .byte 0x0c # pop %esi + .byte 0x10 # pop %edi + .byte 0x14 # pop %ebp + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es + .byte 0x24,0x24,0x24 # add $4,%esp + .byte 0x28 # iret + .byte 0x00,0x00,0x00,0x00 # movb $1,1(%esi) + .byte 0x00,0x00 # jmp 11b + +#else + +ENTRY(hypervisor_callback) + pushl $0 + pushl $do_hypervisor_callback + jmp do_exception + +#endif + +# Hypervisor uses this for application faults while it executes. +ENTRY(failsafe_callback) + pop %ds + pop %es + pop %fs + pop %gs + iret + +ENTRY(coprocessor_error) + pushl $0 + pushl $do_coprocessor_error + jmp do_exception + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $do_simd_coprocessor_error + jmp do_exception + +ENTRY(device_not_available) + iret + +ENTRY(debug) + pushl $0 + pushl $do_debug + jmp do_exception + +ENTRY(int3) + pushl $0 + pushl $do_int3 + jmp do_exception + +ENTRY(overflow) + pushl $0 + pushl $do_overflow + jmp do_exception + +ENTRY(bounds) + pushl $0 + pushl $do_bounds + jmp do_exception + +ENTRY(invalid_op) + pushl $0 + pushl $do_invalid_op + jmp do_exception + + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $do_coprocessor_segment_overrun + jmp do_exception + + +ENTRY(invalid_TSS) + pushl $do_invalid_TSS + jmp do_exception + + +ENTRY(segment_not_present) + pushl $do_segment_not_present + jmp do_exception + + +ENTRY(stack_segment) + pushl $do_stack_segment + jmp do_exception + + +ENTRY(general_protection) + pushl $do_general_protection + jmp do_exception + + +ENTRY(alignment_check) + pushl $do_alignment_check + jmp do_exception + + +ENTRY(page_fault) + pushl $do_page_fault + jmp do_exception + +ENTRY(machine_check) + pushl $0 + pushl $do_machine_check + jmp do_exception + + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $do_spurious_interrupt_bug + jmp do_exception + + + +ENTRY(thread_starter) + popl %eax + popl %ebx + pushl $0 + xorl %ebp,%ebp + pushl %eax + call *%ebx + call exit_thread + +ENTRY(__arch_switch_threads) + movl 4(%esp), %ecx /* prev */ + movl 8(%esp), %edx /* next */ + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl %esp, (%ecx) /* save ESP */ + movl (%edx), %esp /* restore ESP */ + movl $1f, 4(%ecx) /* save EIP */ + pushl 4(%edx) /* restore EIP */ + ret +1: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + +#ifndef CONFIG_PARAVIRT +.data +.globl page_table_base + .align __PAGE_SIZE +page_table_base: + PTE(page_table_l2 + L3_PROT) + .align __PAGE_SIZE, 0 +#endif diff -Nru xen-4.9.0/extras/mini-os/arch/x86/x86_64.S xen-4.9.2/extras/mini-os/arch/x86/x86_64.S --- xen-4.9.0/extras/mini-os/arch/x86/x86_64.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/x86_64.S 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,405 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define ENTRY(X) .globl X ; X : + +#ifdef CONFIG_PARAVIRT +ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "Mini-OS") +ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") +ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _WORD hypercall_page) +ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") +.text + +.globl _start + +_start: +#else + +#include "x86_hvm.S" + +#endif + cld + movq stack_start(%rip),%rsp + andq $(~(__STACK_SIZE-1)), %rsp + movq %rsi,%rdi + call arch_init + +stack_start: + .quad stack+(2*__STACK_SIZE) + +.globl shared_info, hypercall_page + /* Unpleasant -- the PTE that maps this page is actually overwritten */ + /* to map the real shared-info page! :-) */ + .align __PAGE_SIZE +shared_info: + .fill __PAGE_SIZE,1,0 + +hypercall_page: + .fill __PAGE_SIZE,1,0 + + +#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg +#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg) +#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg) +#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg) + +/* Offsets into shared_info_t. */ +#define evtchn_upcall_pending /* 0 */ +#define evtchn_upcall_mask 1 + +NMI_MASK = 0x80000000 +KERNEL_CS_MASK = 0xfc + +#define RAX 80 +#define RDI 112 +#define ORIG_RAX 120 /* + error_code */ +#define RIP 128 +#define CS 136 +#define RFLAGS 144 +#define RSP 152 + + +/* Macros */ +.macro SAVE_PARAVIRT +#ifdef CONFIG_PARAVIRT + pop %rcx + pop %r11 /* rsp points to the error code */ +#endif +.endm + +.macro zeroentry sym + SAVE_PARAVIRT + pushq $0 /* push error code/oldrax */ + pushq %rax /* push real oldrax to the rdi slot */ + leaq \sym(%rip),%rax + jmp error_entry +.endm + +.macro errorentry sym + SAVE_PARAVIRT + pushq %rax + leaq \sym(%rip),%rax + jmp error_entry +.endm + +.macro RESTORE_ALL + movq (%rsp),%r15 + movq 1*8(%rsp),%r14 + movq 2*8(%rsp),%r13 + movq 3*8(%rsp),%r12 + movq 4*8(%rsp),%rbp + movq 5*8(%rsp),%rbx + movq 6*8(%rsp),%r11 + movq 7*8(%rsp),%r10 + movq 8*8(%rsp),%r9 + movq 9*8(%rsp),%r8 + movq 10*8(%rsp),%rax + movq 11*8(%rsp),%rcx + movq 12*8(%rsp),%rdx + movq 13*8(%rsp),%rsi + movq 14*8(%rsp),%rdi + addq $15*8+8,%rsp +.endm + +.macro SAVE_ALL + /* rdi slot contains rax, oldrax contains error code */ + cld + subq $14*8,%rsp + movq %rsi,13*8(%rsp) + movq 14*8(%rsp),%rsi /* load rax from rdi slot */ + movq %rdx,12*8(%rsp) + movq %rcx,11*8(%rsp) + movq %rsi,10*8(%rsp) /* store rax */ + movq %r8, 9*8(%rsp) + movq %r9, 8*8(%rsp) + movq %r10,7*8(%rsp) + movq %r11,6*8(%rsp) + movq %rbx,5*8(%rsp) + movq %rbp,4*8(%rsp) + movq %r12,3*8(%rsp) + movq %r13,2*8(%rsp) + movq %r14,1*8(%rsp) + movq %r15,(%rsp) + movq %rdi, RDI(%rsp) /* put rdi into the slot */ +.endm + +.macro HYPERVISOR_IRET +#ifdef CONFIG_PARAVIRT + testl $NMI_MASK,2*8(%rsp) + jnz 2f + + /* Direct iret to kernel space. Correct CS and SS. */ + orb $3,1*8(%rsp) + orb $3,4*8(%rsp) +#endif + iretq + +#ifdef CONFIG_PARAVIRT +2: /* Slow iret via hypervisor. */ + andl $~NMI_MASK, 16(%rsp) + pushq $0 + jmp hypercall_page + (__HYPERVISOR_iret * 32) +#endif +.endm + + +/* + * Exception entry point. This expects an error code/orig_rax on the stack + * and the exception handler in %rax. + */ +error_entry: + SAVE_ALL + + movq %rsp,%rdi + movq ORIG_RAX(%rsp),%rsi # get error code + movq $-1,ORIG_RAX(%rsp) + call *%rax + jmp error_exit + + +#ifdef CONFIG_PARAVIRT +/* + * Xen event (virtual interrupt) entry point. + */ +ENTRY(hypervisor_callback) + zeroentry hypervisor_callback2 + +hypervisor_callback2: + movq %rdi, %rsp + + /* check against event re-entrant */ + movq RIP(%rsp),%rax + cmpq $scrit,%rax + jb 11f + cmpq $ecrit,%rax + jb critical_region_fixup + +11: movq %gs:8,%rax + incl %gs:0 + cmovzq %rax,%rsp + pushq %rdi + call do_hypervisor_callback + popq %rsp + decl %gs:0 + +error_exit: + movl RFLAGS(%rsp), %eax + shr $9, %eax # EAX[0] == IRET_RFLAGS.IF + XEN_GET_VCPU_INFO(%rsi) + andb evtchn_upcall_mask(%rsi),%al + andb $1,%al # EAX[0] == IRET_RFLAGS.IF & event_mask + jnz restore_all_enable_events # != 0 => enable event delivery + + RESTORE_ALL + HYPERVISOR_IRET + +restore_all_enable_events: + RESTORE_ALL + pushq %rax # save rax for it will be clobbered later + RSP_OFFSET=8 # record the stack frame layout changes + XEN_GET_VCPU_INFO(%rax) # safe to use rax since it is saved + XEN_LOCKED_UNBLOCK_EVENTS(%rax) + +scrit: /**** START OF CRITICAL REGION ****/ + XEN_TEST_PENDING(%rax) + jz 12f + XEN_LOCKED_BLOCK_EVENTS(%rax) # if pending, mask events and handle + # by jumping to hypervisor_prologue +12: popq %rax # all registers restored from this point + +restore_end: + jnz hypervisor_prologue # safe to jump out of critical region + # because events are masked if ZF = 0 + HYPERVISOR_IRET +ecrit: /**** END OF CRITICAL REGION ****/ + +# Set up the stack as Xen does before calling event callback +hypervisor_prologue: + pushq %r11 + pushq %rcx + jmp hypervisor_callback + +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so if rax has been +# restored. We determine by comparing interrupted rip with "restore_end". +# We always copy all registers below RIP from the current stack frame +# to the end of the previous activation frame so that we can continue +# as if we've never even reached 11 running in the old activation frame. + +critical_region_fixup: + # Set up source and destination region pointers + leaq RIP(%rsp),%rsi # esi points at end of src region + # Acquire interrupted rsp which was saved-on-stack. This points to + # the end of dst region. Note that it is not necessarily current rsp + # plus 0xb0, because the second interrupt might align the stack frame. + movq RSP(%rsp),%rdi # edi points at end of dst region + + cmpq $restore_end,%rax + jae 13f + + # If interrupted rip is before restore_end + # then rax hasn't been restored yet + movq (%rdi),%rax + movq %rax, RAX(%rsp) # save rax + addq $RSP_OFFSET,%rdi + + # Set up the copy +13: movq $RIP,%rcx + shr $3,%rcx # convert bytes into count of 64-bit entities +15: subq $8,%rsi # pre-decrementing copy loop + subq $8,%rdi + movq (%rsi),%rax + movq %rax,(%rdi) + loop 15b +16: movq %rdi,%rsp # final rdi is top of merged stack + andb $KERNEL_CS_MASK,CS(%rsp) # CS might have changed + jmp 11b + +#else +error_exit: + RESTORE_ALL + HYPERVISOR_IRET + +/* + * Xen event (virtual interrupt) entry point. + */ +ENTRY(hypervisor_callback) + zeroentry do_hypervisor_callback + + +#endif + +ENTRY(failsafe_callback) +#ifdef CONFIG_PARAVIRT + popq %rcx + popq %r11 +#endif + iretq + + +ENTRY(coprocessor_error) + zeroentry do_coprocessor_error + + +ENTRY(simd_coprocessor_error) + zeroentry do_simd_coprocessor_error + + +ENTRY(device_not_available) + zeroentry do_device_not_available + + +ENTRY(debug) + zeroentry do_debug + + +ENTRY(int3) + zeroentry do_int3 + +ENTRY(overflow) + zeroentry do_overflow + + +ENTRY(bounds) + zeroentry do_bounds + + +ENTRY(invalid_op) + zeroentry do_invalid_op + + +ENTRY(coprocessor_segment_overrun) + zeroentry do_coprocessor_segment_overrun + + +ENTRY(invalid_TSS) + errorentry do_invalid_TSS + + +ENTRY(segment_not_present) + errorentry do_segment_not_present + + +/* runs on exception stack */ +ENTRY(stack_segment) + errorentry do_stack_segment + + +ENTRY(general_protection) + errorentry do_general_protection + + +ENTRY(alignment_check) + errorentry do_alignment_check + + +ENTRY(divide_error) + zeroentry do_divide_error + + +ENTRY(spurious_interrupt_bug) + zeroentry do_spurious_interrupt_bug + + +ENTRY(page_fault) + errorentry do_page_fault + + + + + +ENTRY(thread_starter) + popq %rdi + popq %rbx + pushq $0 + xorq %rbp,%rbp + call *%rbx + call exit_thread + + +ENTRY(__arch_switch_threads) + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp, (%rdi) /* save ESP */ + movq (%rsi), %rsp /* restore ESP */ + movq $1f, 8(%rdi) /* save EIP */ + pushq 8(%rsi) /* restore EIP */ + ret +1: + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + ret + +#ifndef CONFIG_PARAVIRT +.data +.globl page_table_base + .align __PAGE_SIZE +page_table_virt_l2: + PTE(page_table_virt_l1 + L2_PROT) + .align __PAGE_SIZE, 0 +page_table_virt_l3: + PTE(page_table_virt_l2 + L3_PROT) + .align __PAGE_SIZE, 0 +page_table_l3: + PTE(page_table_l2 + L3_PROT) + .align __PAGE_SIZE, 0 +page_table_base: + PTE(page_table_l3 + L4_PROT) + PTE(page_table_virt_l3 + L4_PROT) + .align __PAGE_SIZE, 0 +#endif diff -Nru xen-4.9.0/extras/mini-os/arch/x86/x86_hvm.S xen-4.9.2/extras/mini-os/arch/x86/x86_hvm.S --- xen-4.9.0/extras/mini-os/arch/x86/x86_hvm.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/arch/x86/x86_hvm.S 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,88 @@ +/* Included by x86_[32|64].S */ + + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long _start) +.text + .code32 /* Always starts in 32bit flat mode. */ + +.globl _start + +_start: + mov $(X86_CR4_PAE | X86_CR4_OSFXSR), %eax + mov %eax, %cr4 + mov $page_table_base, %eax + mov %eax, %cr3 + +#ifdef __x86_64__ /* EFER.LME = 1 */ + mov $MSR_EFER, %ecx + rdmsr + bts $_EFER_LME, %eax + wrmsr +#endif /* __x86_64__ */ + + mov %cr0, %eax + or $X86_CR0_PG, %eax + mov %eax, %cr0 + + lgdt gdt_ptr + + /* Load code segment. */ + ljmp $__KERN_CS, $1f +#ifdef __x86_64__ + .code64 +#endif + + /* Load data segments. */ +1: + mov $__USER_DS, %eax + mov %eax, %ds + mov %eax, %es + mov %eax, %fs + mov %eax, %gs + mov $__KERN_DS, %eax + mov %eax, %ss + + mov %ebx, %esi + +.data +/* + * Macro to create a sequence of page table entries. + * As a loop can be done via recursion only and the nesting level is limited + * we treat the first 32 PTEs in a special way limiting nesting level to 64 + * in case of a complete page table (512 PTEs) to be filled. + * prot: protection bits in all PTEs + * addr: physical address of the area to map + * incr: increment of address for each PTE + * idx: index of first PTE in page table + * end: index of last PTE in page table + 1 + */ + .macro PTES prot, addr, incr, idx, end + .ifgt \end-\idx-32 + PTES \prot, \addr, \incr, \idx, "(\idx+32)" + PTES \prot, "(\addr+32*\incr)", \incr, "(\idx+32)", \end + .else + PTE(\addr + \prot) + .if \end-\idx-1 + PTES \prot, "(\addr+\incr)", \incr, "(\idx+1)", \end + .endif + .endif + .endm + .align __PAGE_SIZE +page_table_virt_l1: + PTE(0) + .align __PAGE_SIZE, 0 +page_table_l1: + PTES L1_PROT, 0x00000000, 0x00001000, 0, L1_PAGETABLE_ENTRIES + .align __PAGE_SIZE, 0 +page_table_l2: + /* Map the first 1GB of memory (on 32 bit 16MB less). */ + PTE(page_table_l1 + L2_PROT) +#ifdef __x86_64__ + PTES L2_PROT|_PAGE_PSE, 0x00200000, 0x00200000, 1, L2_PAGETABLE_ENTRIES +#else + /* At 3f000000 virtual kernel area is starting. */ + PTES L2_PROT|_PAGE_PSE, 0x00200000, 0x00200000, 1, l2_table_offset(VIRT_KERNEL_AREA) + PTE(page_table_virt_l1 + L2_PROT) +#endif + .align __PAGE_SIZE, 0 + +.text diff -Nru xen-4.9.0/extras/mini-os/balloon.c xen-4.9.2/extras/mini-os/balloon.c --- xen-4.9.0/extras/mini-os/balloon.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/balloon.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,158 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2016 - Juergen Gross, SUSE Linux GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +unsigned long nr_max_pages; +unsigned long nr_mem_pages; + +void get_max_pages(void) +{ + long ret; + domid_t domid = DOMID_SELF; + + ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); + if ( ret < 0 ) + { + printk("Could not get maximum pfn\n"); + return; + } + + nr_max_pages = ret; + printk("Maximum memory size: %ld pages\n", nr_max_pages); +} + +void mm_alloc_bitmap_remap(void) +{ + unsigned long i, new_bitmap; + + if ( mm_alloc_bitmap_size >= ((nr_max_pages + 1) >> 3) ) + return; + + new_bitmap = alloc_virt_kernel(PFN_UP((nr_max_pages + 1) >> 3)); + for ( i = 0; i < mm_alloc_bitmap_size; i += PAGE_SIZE ) + { + map_frame_rw(new_bitmap + i, + virt_to_mfn((unsigned long)(mm_alloc_bitmap) + i)); + } + + mm_alloc_bitmap = (unsigned long *)new_bitmap; +} + +#define N_BALLOON_FRAMES 64 +static unsigned long balloon_frames[N_BALLOON_FRAMES]; + +int balloon_up(unsigned long n_pages) +{ + unsigned long page, pfn; + int rc; + struct xen_memory_reservation reservation = { + .domid = DOMID_SELF + }; + + if ( n_pages > nr_max_pages - nr_mem_pages ) + n_pages = nr_max_pages - nr_mem_pages; + if ( n_pages > N_BALLOON_FRAMES ) + n_pages = N_BALLOON_FRAMES; + + /* Resize alloc_bitmap if necessary. */ + while ( mm_alloc_bitmap_size * 8 < nr_mem_pages + n_pages ) + { + page = alloc_page(); + if ( !page ) + return -ENOMEM; + + memset((void *)page, ~0, PAGE_SIZE); + if ( map_frame_rw((unsigned long)mm_alloc_bitmap + mm_alloc_bitmap_size, + virt_to_mfn(page)) ) + { + free_page((void *)page); + return -ENOMEM; + } + + mm_alloc_bitmap_size += PAGE_SIZE; + } + + rc = arch_expand_p2m(nr_mem_pages + n_pages); + if ( rc ) + return rc; + + /* Get new memory from hypervisor. */ + for ( pfn = 0; pfn < n_pages; pfn++ ) + { + balloon_frames[pfn] = nr_mem_pages + pfn; + } + set_xen_guest_handle(reservation.extent_start, balloon_frames); + reservation.nr_extents = n_pages; + rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); + if ( rc <= 0 ) + return rc; + + for ( pfn = 0; pfn < rc; pfn++ ) + { + arch_pfn_add(nr_mem_pages + pfn, balloon_frames[pfn]); + free_page(pfn_to_virt(nr_mem_pages + pfn)); + } + + nr_mem_pages += rc; + + return rc; +} + +static int in_balloon; + +int chk_free_pages(unsigned long needed) +{ + unsigned long n_pages; + + /* No need for ballooning if plenty of space available. */ + if ( needed + BALLOON_EMERGENCY_PAGES <= nr_free_pages ) + return 1; + + /* If we are already ballooning up just hope for the best. */ + if ( in_balloon ) + return 1; + + /* Interrupts disabled can't be handled right now. */ + if ( irqs_disabled() ) + return 1; + + in_balloon = 1; + + while ( needed + BALLOON_EMERGENCY_PAGES > nr_free_pages ) + { + n_pages = needed + BALLOON_EMERGENCY_PAGES - nr_free_pages; + if ( !balloon_up(n_pages) ) + break; + } + + in_balloon = 0; + + return needed <= nr_free_pages; +} diff -Nru xen-4.9.0/extras/mini-os/blkfront.c xen-4.9.2/extras/mini-os/blkfront.c --- xen-4.9.0/extras/mini-os/blkfront.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/blkfront.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,732 @@ +/* Minimal block driver for Mini-OS. + * Copyright (c) 2007-2008 Samuel Thibault. + * Based on netfront.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Note: we generally don't need to disable IRQs since we hardly do anything in + * the interrupt handler. */ + +/* Note: we really suppose non-preemptive threads. */ + +DECLARE_WAIT_QUEUE_HEAD(blkfront_queue); + + + + +#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) +#define GRANT_INVALID_REF 0 + + +struct blk_buffer { + void* page; + grant_ref_t gref; +}; + +struct blkfront_dev { + domid_t dom; + + struct blkif_front_ring ring; + grant_ref_t ring_ref; + evtchn_port_t evtchn; + blkif_vdev_t handle; + + char *nodename; + char *backend; + struct blkfront_info info; + + xenbus_event_queue events; + +#ifdef HAVE_LIBC + int fd; +#endif +}; + +void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ +#ifdef HAVE_LIBC + struct blkfront_dev *dev = data; + int fd = dev->fd; + + if (fd != -1) + files[fd].read = 1; +#endif + wake_up(&blkfront_queue); +} + +static void free_blkfront(struct blkfront_dev *dev) +{ + mask_evtchn(dev->evtchn); + + free(dev->backend); + + gnttab_end_access(dev->ring_ref); + free_page(dev->ring.sring); + + unbind_evtchn(dev->evtchn); + + free(dev->nodename); + free(dev); +} + +struct blkfront_dev *init_blkfront(char *_nodename, struct blkfront_info *info) +{ + xenbus_transaction_t xbt; + char* err; + char* message=NULL; + struct blkif_sring *s; + int retry=0; + char* msg = NULL; + char* c; + char* nodename = _nodename ? _nodename : "device/vbd/768"; + + struct blkfront_dev *dev; + + char path[strlen(nodename) + strlen("/backend-id") + 1]; + + printk("******************* BLKFRONT for %s **********\n\n\n", nodename); + + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dev->dom = xenbus_read_integer(path); + evtchn_alloc_unbound(dev->dom, blkfront_handler, dev, &dev->evtchn); + + s = (struct blkif_sring*) alloc_page(); + memset(s,0,PAGE_SIZE); + + + SHARED_RING_INIT(s); + FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE); + + dev->ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(s),0); + + dev->events = NULL; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "ring-ref","%u", + dev->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "event-channel", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); + if (err) { + message = "writing protocol"; + goto abort_transaction; + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateConnected); + if (err) { + message = "switching state"; + goto abort_transaction; + } + + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + if (msg) { + printk("Error %s when reading the backend path %s\n", msg, path); + goto error; + } + + printk("backend at %s\n", dev->backend); + + dev->handle = strtoul(strrchr(nodename, '/')+1, NULL, 0); + + { + XenbusState state; + char path[strlen(dev->backend) + strlen("/feature-flush-cache") + 1]; + snprintf(path, sizeof(path), "%s/mode", dev->backend); + msg = xenbus_read(XBT_NIL, path, &c); + if (msg) { + printk("Error %s when reading the mode\n", msg); + goto error; + } + if (*c == 'w') + dev->info.mode = O_RDWR; + else + dev->info.mode = O_RDONLY; + free(c); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + + msg = NULL; + state = xenbus_read_integer(path); + while (msg == NULL && state < XenbusStateConnected) + msg = xenbus_wait_for_state_change(path, &state, &dev->events); + if (msg != NULL || state != XenbusStateConnected) { + printk("backend not available, state=%d\n", state); + xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + + snprintf(path, sizeof(path), "%s/info", dev->backend); + dev->info.info = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/sectors", dev->backend); + // FIXME: read_integer returns an int, so disk size limited to 1TB for now + dev->info.sectors = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/sector-size", dev->backend); + dev->info.sector_size = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend); + dev->info.barrier = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend); + dev->info.flush = xenbus_read_integer(path); + + *info = dev->info; + } + unmask_evtchn(dev->evtchn); + + printk("%lu sectors of %u bytes\n", (unsigned long) dev->info.sectors, dev->info.sector_size); + printk("**************************\n"); + + return dev; + +error: + free(msg); + free(err); + free_blkfront(dev); + return NULL; +} + +void shutdown_blkfront(struct blkfront_dev *dev) +{ + char* err = NULL, *err2; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/event-channel") + 1]; + + blkfront_sync(dev); + + printk("close blk: backend=%s node=%s\n", dev->backend, dev->nodename); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("shutdown_blkfront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("shutdown_blkfront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close; + } + state = xenbus_read_integer(path); + while (state < XenbusStateClosed) { + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + } + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { + printk("shutdown_blkfront: error changing state to %d: %s\n", + XenbusStateInitialising, err); + goto close; + } + state = xenbus_read_integer(path); + while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed)) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + +close: + free(err); + err2 = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err2); + + snprintf(nodename, sizeof(nodename), "%s/ring-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + + if (!err) + free_blkfront(dev); +} + +static void blkfront_wait_slot(struct blkfront_dev *dev) +{ + /* Wait for a slot */ + if (RING_FULL(&dev->ring)) { + unsigned long flags; + DEFINE_WAIT(w); + local_irq_save(flags); + while (1) { + blkfront_aio_poll(dev); + if (!RING_FULL(&dev->ring)) + break; + /* Really no slot, go to sleep. */ + add_waiter(w, blkfront_queue); + local_irq_restore(flags); + schedule(); + local_irq_save(flags); + } + remove_waiter(w, blkfront_queue); + local_irq_restore(flags); + } +} + +/* Issue an aio */ +void blkfront_aio(struct blkfront_aiocb *aiocbp, int write) +{ + struct blkfront_dev *dev = aiocbp->aio_dev; + struct blkif_request *req; + RING_IDX i; + int notify; + int n, j; + uintptr_t start, end; + + // Can't io at non-sector-aligned location + ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1))); + // Can't io non-sector-sized amounts + ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1))); + // Can't io non-sector-aligned buffer + ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1))); + + start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK; + end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK; + aiocbp->n = n = (end - start) / PAGE_SIZE; + + /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB, + * so max 44KB can't happen */ + ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST); + + blkfront_wait_slot(dev); + i = dev->ring.req_prod_pvt; + req = RING_GET_REQUEST(&dev->ring, i); + + req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ; + req->nr_segments = n; + req->handle = dev->handle; + req->id = (uintptr_t) aiocbp; + req->sector_number = aiocbp->aio_offset / 512; + + for (j = 0; j < n; j++) { + req->seg[j].first_sect = 0; + req->seg[j].last_sect = PAGE_SIZE / 512 - 1; + } + req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 512; + req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / 512; + for (j = 0; j < n; j++) { + uintptr_t data = start + j * PAGE_SIZE; + if (!write) { + /* Trigger CoW if needed */ + *(char*)(data + (req->seg[j].first_sect << 9)) = 0; + barrier(); + } + aiocbp->gref[j] = req->seg[j].gref = + gnttab_grant_access(dev->dom, virtual_to_mfn(data), write); + } + + dev->ring.req_prod_pvt = i + 1; + + wmb(); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify); + + if(notify) notify_remote_via_evtchn(dev->evtchn); +} + +static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret) +{ + aiocbp->data = (void*) 1; + aiocbp->aio_cb = NULL; +} + +void blkfront_io(struct blkfront_aiocb *aiocbp, int write) +{ + unsigned long flags; + DEFINE_WAIT(w); + + ASSERT(!aiocbp->aio_cb); + aiocbp->aio_cb = blkfront_aio_cb; + blkfront_aio(aiocbp, write); + aiocbp->data = NULL; + + local_irq_save(flags); + while (1) { + blkfront_aio_poll(aiocbp->aio_dev); + if (aiocbp->data) + break; + + add_waiter(w, blkfront_queue); + local_irq_restore(flags); + schedule(); + local_irq_save(flags); + } + remove_waiter(w, blkfront_queue); + local_irq_restore(flags); +} + +static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op, uint64_t id) +{ + int i; + struct blkif_request *req; + int notify; + + blkfront_wait_slot(dev); + i = dev->ring.req_prod_pvt; + req = RING_GET_REQUEST(&dev->ring, i); + req->operation = op; + req->nr_segments = 0; + req->handle = dev->handle; + req->id = id; + /* Not needed anyway, but the backend will check it */ + req->sector_number = 0; + dev->ring.req_prod_pvt = i + 1; + wmb(); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify); + if (notify) notify_remote_via_evtchn(dev->evtchn); +} + +void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op) +{ + struct blkfront_dev *dev = aiocbp->aio_dev; + blkfront_push_operation(dev, op, (uintptr_t) aiocbp); +} + +void blkfront_sync(struct blkfront_dev *dev) +{ + unsigned long flags; + DEFINE_WAIT(w); + + if (dev->info.mode == O_RDWR) { + if (dev->info.barrier == 1) + blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0); + + if (dev->info.flush == 1) + blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0); + } + + /* Note: This won't finish if another thread enqueues requests. */ + local_irq_save(flags); + while (1) { + blkfront_aio_poll(dev); + if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring)) + break; + + add_waiter(w, blkfront_queue); + local_irq_restore(flags); + schedule(); + local_irq_save(flags); + } + remove_waiter(w, blkfront_queue); + local_irq_restore(flags); +} + +int blkfront_aio_poll(struct blkfront_dev *dev) +{ + RING_IDX rp, cons; + struct blkif_response *rsp; + int more; + int nr_consumed; + +moretodo: +#ifdef HAVE_LIBC + if (dev->fd != -1) { + files[dev->fd].read = 0; + mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */ + } +#endif + + rp = dev->ring.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + cons = dev->ring.rsp_cons; + + nr_consumed = 0; + while ((cons != rp)) + { + struct blkfront_aiocb *aiocbp; + int status; + + rsp = RING_GET_RESPONSE(&dev->ring, cons); + nr_consumed++; + + aiocbp = (void*) (uintptr_t) rsp->id; + status = rsp->status; + + switch (rsp->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + { + int j; + + if (status != BLKIF_RSP_OKAY) + printk("%s error %d on %s at offset %llu, num bytes %llu\n", + rsp->operation == BLKIF_OP_READ?"read":"write", + status, aiocbp->aio_dev->nodename, + (unsigned long long) aiocbp->aio_offset, + (unsigned long long) aiocbp->aio_nbytes); + + for (j = 0; j < aiocbp->n; j++) + gnttab_end_access(aiocbp->gref[j]); + + break; + } + + case BLKIF_OP_WRITE_BARRIER: + if (status != BLKIF_RSP_OKAY) + printk("write barrier error %d\n", status); + break; + case BLKIF_OP_FLUSH_DISKCACHE: + if (status != BLKIF_RSP_OKAY) + printk("flush error %d\n", status); + break; + + default: + printk("unrecognized block operation %d response (status %d)\n", rsp->operation, status); + break; + } + + dev->ring.rsp_cons = ++cons; + /* Nota: callback frees aiocbp itself */ + if (aiocbp && aiocbp->aio_cb) + aiocbp->aio_cb(aiocbp, status ? -EIO : 0); + if (dev->ring.rsp_cons != cons) + /* We reentered, we must not continue here */ + break; + } + + RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more); + if (more) goto moretodo; + + return nr_consumed; +} + +#ifdef HAVE_LIBC +int blkfront_open(struct blkfront_dev *dev) +{ + /* Silently prevent multiple opens */ + if(dev->fd != -1) { + return dev->fd; + } + dev->fd = alloc_fd(FTYPE_BLK); + printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd); + files[dev->fd].blk.dev = dev; + files[dev->fd].blk.offset = 0; + return dev->fd; +} + +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write) +{ + struct blkfront_dev* dev = files[fd].blk.dev; + off_t offset = files[fd].blk.offset; + struct blkfront_aiocb aiocb; + unsigned long long disksize = dev->info.sectors * dev->info.sector_size; + unsigned int blocksize = dev->info.sector_size; + + int blknum; + int blkoff; + size_t bytes; + int rc = 0; + int alignedbuf = 0; + uint8_t* copybuf = NULL; + + /* RW 0 bytes is just a NOP */ + if(count == 0) { + return 0; + } + /* Check for NULL buffer */ + if( buf == NULL ) { + errno = EFAULT; + return -1; + } + + /* Write mode checks */ + if(write) { + /*Make sure we have write permission */ + if(dev->info.info & VDISK_READONLY + || (dev->info.mode != O_RDWR && dev->info.mode != O_WRONLY)) { + errno = EACCES; + return -1; + } + /*Make sure disk is big enough for this write */ + if(offset + count > disksize) { + errno = ENOSPC; + return -1; + } + } + /* Read mode checks */ + else + { + /* Reading past the disk? Just return 0 */ + if(offset >= disksize) { + return 0; + } + + /*If the requested read is bigger than the disk, just + * read as much as we can until the end */ + if(offset + count > disksize) { + count = disksize - offset; + } + } + /* Determine which block to start at and at which offset inside of it */ + blknum = offset / blocksize; + blkoff = offset % blocksize; + + /* Optimization: We need to check if buf is aligned to the sector size. + * This is somewhat tricky code. We have to add the blocksize - block offset + * because the first block may be a partial block and then for every subsequent + * block rw the buffer will be offset.*/ + if(!((uintptr_t) (buf +(blocksize - blkoff)) & (dev->info.sector_size-1))) { + alignedbuf = 1; + } + + /* Setup aiocb block object */ + aiocb.aio_dev = dev; + aiocb.aio_offset = blknum * blocksize; + aiocb.aio_cb = NULL; + aiocb.data = NULL; + + /* If our buffer is unaligned or its aligned but we will need to rw a partial block + * then a copy will have to be done */ + if(!alignedbuf || blkoff != 0 || count % blocksize != 0) { + copybuf = _xmalloc(blocksize, dev->info.sector_size); + } + + rc = count; + while(count > 0) { + /* determine how many bytes to read/write from/to the current block buffer */ + if(!alignedbuf || blkoff != 0 || count < blocksize) { + /* This is the case for unaligned R/W or partial block */ + bytes = count < blocksize - blkoff ? count : blocksize - blkoff; + aiocb.aio_nbytes = blocksize; + } else { + /* We can optimize further if buffer is page aligned */ + int not_page_aligned = 0; + if(((uintptr_t)buf) & (PAGE_SIZE -1)) { + not_page_aligned = 1; + } + + /* For an aligned R/W we can read up to the maximum transfer size */ + bytes = count > (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE + ? (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE + : count & ~(blocksize -1); + aiocb.aio_nbytes = bytes; + } + + /* read operation */ + if(!write) { + if (alignedbuf && bytes >= blocksize) { + /* If aligned and were reading a whole block, just read right into buf */ + aiocb.aio_buf = buf; + blkfront_read(&aiocb); + } else { + /* If not then we have to do a copy */ + aiocb.aio_buf = copybuf; + blkfront_read(&aiocb); + memcpy(buf, ©buf[blkoff], bytes); + } + } + /* Write operation */ + else { + if(alignedbuf && bytes >= blocksize) { + /* If aligned and were writing a whole block, just write directly from buf */ + aiocb.aio_buf = buf; + blkfront_write(&aiocb); + } else { + /* If not then we have to do a copy. */ + aiocb.aio_buf = copybuf; + /* If we're writing a partial block, we need to read the current contents first + * so we don't overwrite the extra bits with garbage */ + if(blkoff != 0 || bytes < blocksize) { + blkfront_read(&aiocb); + } + memcpy(©buf[blkoff], buf, bytes); + blkfront_write(&aiocb); + } + } + /* Will start at beginning of all remaining blocks */ + blkoff = 0; + + /* Increment counters and continue */ + count -= bytes; + buf += bytes; + if(bytes < blocksize) { + //At minimum we read one block + aiocb.aio_offset += blocksize; + } else { + //If we read more than a block, was a multiple of blocksize + aiocb.aio_offset += bytes; + } + } + + free(copybuf); + files[fd].blk.offset += rc; + return rc; + +} + +int blkfront_posix_fstat(int fd, struct stat* buf) +{ + struct blkfront_dev* dev = files[fd].blk.dev; + + buf->st_mode = dev->info.mode; + buf->st_uid = 0; + buf->st_gid = 0; + buf->st_size = dev->info.sectors * dev->info.sector_size; + buf->st_atime = buf->st_mtime = buf->st_ctime = time(NULL); + + return 0; +} +#endif diff -Nru xen-4.9.0/extras/mini-os/Config.mk xen-4.9.2/extras/mini-os/Config.mk --- xen-4.9.0/extras/mini-os/Config.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/Config.mk 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,205 @@ +# +# Compare $(1) and $(2) and replace $(2) with $(1) if they differ +# +# Typically $(1) is a newly generated file and $(2) is the target file +# being regenerated. This prevents changing the timestamp of $(2) only +# due to being auto regenereated with the same contents. +define move-if-changed + if ! cmp -s $(1) $(2); then mv -f $(1) $(2); else rm -f $(1); fi +endef + +# cc-option: Check if compiler supports first option, else fall back to second. +# +# This is complicated by the fact that unrecognised -Wno-* options: +# (a) are ignored unless the compilation emits a warning; and +# (b) even then produce a warning rather than an error +# To handle this we do a test compile, passing the option-under-test, on a code +# fragment that will always produce a warning (integer assigned to pointer). +# We then grep for the option-under-test in the compiler's output, the presence +# of which would indicate an "unrecognized command-line option" warning/error. +# +# Usage: cflags-y += $(call cc-option,$(CC),-march=winchip-c6,-march=i586) +cc-option = $(shell if test -z "`echo 'void*p=1;' | \ + $(1) $(2) -S -o /dev/null -x c - 2>&1 | grep -- $(2) -`"; \ + then echo "$(2)"; else echo "$(3)"; fi ;) + +ifneq ($(MINIOS_CONFIG),) +EXTRA_DEPS += $(MINIOS_CONFIG) +include $(MINIOS_CONFIG) +endif + +# Compatibility with Xen's stubdom build environment. If we are building +# stubdom, some XEN_ variables are set, set MINIOS_ variables accordingly. +# +ifneq ($(XEN_ROOT),) +MINIOS_ROOT=$(XEN_ROOT)/extras/mini-os +else +MINIOS_ROOT=$(TOPLEVEL_DIR) +endif +export MINIOS_ROOT + +ifneq ($(XEN_TARGET_ARCH),) +MINIOS_TARGET_ARCH = $(XEN_TARGET_ARCH) +else +MINIOS_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \ + -e s/i86pc/x86_32/ -e s/amd64/x86_64/ \ + -e s/armv7.*/arm32/ -e s/armv8.*/arm64/ \ + -e s/aarch64/arm64/) + +MINIOS_TARGET_ARCH ?= $(MINIOS_COMPILE_ARCH) +endif + +libc = $(stubdom) + +XEN_INTERFACE_VERSION ?= 0x00030205 +export XEN_INTERFACE_VERSION + +# Try to find out the architecture family TARGET_ARCH_FAM. +# First check whether x86_... is contained (for x86_32, x86_32y, x86_64). +# If not x86 then use $(MINIOS_TARGET_ARCH) +ifeq ($(findstring x86_,$(MINIOS_TARGET_ARCH)),x86_) +TARGET_ARCH_FAM = x86 +else +TARGET_ARCH_FAM = $(MINIOS_TARGET_ARCH) +endif + +# The architecture family directory below mini-os. +TARGET_ARCH_DIR := arch/$(TARGET_ARCH_FAM) + +# Export these variables for possible use in architecture dependent makefiles. +export TARGET_ARCH_DIR +export TARGET_ARCH_FAM + +# This is used for architecture specific links. +# This can be overwritten from arch specific rules. +ARCH_LINKS = + +# The path pointing to the architecture specific header files. +ARCH_INC := $(TARGET_ARCH_FAM) + +# For possible special header directories. +# This can be overwritten from arch specific rules. +EXTRA_INC = $(ARCH_INC) + +# Include the architecture family's special makerules. +# This must be before include minios.mk! +include $(MINIOS_ROOT)/$(TARGET_ARCH_DIR)/arch.mk + +extra_incl := $(foreach dir,$(EXTRA_INC),-isystem $(MINIOS_ROOT)/include/$(dir)) + +DEF_CPPFLAGS += -isystem $(MINIOS_ROOT)/include +DEF_CPPFLAGS += -D__MINIOS__ + +ifeq ($(libc),y) +DEF_CPPFLAGS += -DHAVE_LIBC +DEF_CPPFLAGS += -isystem $(MINIOS_ROOT)/include/posix +DEF_CPPFLAGS += -isystem $(XEN_ROOT)/tools/xenstore/include +endif + +ifneq ($(LWIPDIR),) +lwip=y +DEF_CPPFLAGS += -DHAVE_LWIP +DEF_CPPFLAGS += -isystem $(LWIPDIR)/src/include +DEF_CPPFLAGS += -isystem $(LWIPDIR)/src/include/ipv4 +endif + +# Set tools +AS = $(CROSS_COMPILE)as +LD = $(CROSS_COMPILE)ld +ifeq ($(clang),y) +CC = $(CROSS_COMPILE)clang +LD_LTO = $(CROSS_COMPILE)llvm-ld +else +CC = $(CROSS_COMPILE)gcc +LD_LTO = $(CROSS_COMPILE)ld +endif +CPP = $(CC) -E +AR = $(CROSS_COMPILE)ar +RANLIB = $(CROSS_COMPILE)ranlib +NM = $(CROSS_COMPILE)nm +STRIP = $(CROSS_COMPILE)strip +OBJCOPY = $(CROSS_COMPILE)objcopy +OBJDUMP = $(CROSS_COMPILE)objdump +SIZEUTIL = $(CROSS_COMPILE)size + +# Allow git to be wrappered in the environment +GIT ?= git + +INSTALL = install +INSTALL_DIR = $(INSTALL) -d -m0755 -p +INSTALL_DATA = $(INSTALL) -m0644 -p +INSTALL_PROG = $(INSTALL) -m0755 -p + +BOOT_DIR ?= /boot + +SOCKET_LIBS = +UTIL_LIBS = -lutil +DLOPEN_LIBS = -ldl + +SONAME_LDFLAG = -soname +SHLIB_LDFLAGS = -shared + +ifneq ($(debug),y) +CFLAGS += -O2 -fomit-frame-pointer +else +# Less than -O1 produces bad code and large stack frames +CFLAGS += -O1 -fno-omit-frame-pointer +CFLAGS-$(gcc) += -fno-optimize-sibling-calls +endif + +ifeq ($(lto),y) +CFLAGS += -flto +LDFLAGS-$(clang) += -plugin LLVMgold.so +endif + +# When adding a new CONFIG_ option please make sure the test configurations +# under arch/*/testbuild/ are updated accordingly. Especially +# arch/*/testbuild/*-yes and arch/*/testbuild/*-no should set ALL possible +# CONFIG_ variables. + +# Configuration defaults +ifeq ($(TARGET_ARCH_FAM),x86) +CONFIG_PARAVIRT ?= y +else +CONFIG_PARAVIRT ?= n +endif +CONFIG_START_NETWORK ?= y +CONFIG_SPARSE_BSS ?= y +CONFIG_QEMU_XS_ARGS ?= n +CONFIG_TEST ?= n +CONFIG_PCIFRONT ?= n +CONFIG_BLKFRONT ?= y +CONFIG_TPMFRONT ?= n +CONFIG_TPM_TIS ?= n +CONFIG_TPMBACK ?= n +CONFIG_NETFRONT ?= y +CONFIG_FBFRONT ?= y +CONFIG_KBDFRONT ?= y +CONFIG_CONSFRONT ?= y +CONFIG_XENBUS ?= y +CONFIG_XC ?=y +CONFIG_LWIP ?= $(lwip) +CONFIG_BALLOON ?= n + +# Export config items as compiler directives +DEFINES-$(CONFIG_PARAVIRT) += -DCONFIG_PARAVIRT +DEFINES-$(CONFIG_START_NETWORK) += -DCONFIG_START_NETWORK +DEFINES-$(CONFIG_SPARSE_BSS) += -DCONFIG_SPARSE_BSS +DEFINES-$(CONFIG_QEMU_XS_ARGS) += -DCONFIG_QEMU_XS_ARGS +DEFINES-$(CONFIG_PCIFRONT) += -DCONFIG_PCIFRONT +DEFINES-$(CONFIG_BLKFRONT) += -DCONFIG_BLKFRONT +DEFINES-$(CONFIG_TPMFRONT) += -DCONFIG_TPMFRONT +DEFINES-$(CONFIG_TPM_TIS) += -DCONFIG_TPM_TIS +DEFINES-$(CONFIG_TPMBACK) += -DCONFIG_TPMBACK +DEFINES-$(CONFIG_NETFRONT) += -DCONFIG_NETFRONT +DEFINES-$(CONFIG_KBDFRONT) += -DCONFIG_KBDFRONT +DEFINES-$(CONFIG_FBFRONT) += -DCONFIG_FBFRONT +DEFINES-$(CONFIG_CONSFRONT) += -DCONFIG_CONSFRONT +DEFINES-$(CONFIG_XENBUS) += -DCONFIG_XENBUS +DEFINES-$(CONFIG_BALLOON) += -DCONFIG_BALLOON + +DEFINES-y += -D__XEN_INTERFACE_VERSION__=$(XEN_INTERFACE_VERSION) + +# Override settings for this OS +PTHREAD_LIBS = +nosharedlibs=y diff -Nru xen-4.9.0/extras/mini-os/console/console.c xen-4.9.2/extras/mini-os/console/console.c --- xen-4.9.0/extras/mini-os/console/console.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/console/console.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,164 @@ +/* + **************************************************************************** + * (C) 2006 - Grzegorz Milos - Cambridge University + **************************************************************************** + * + * File: console.h + * Author: Grzegorz Milos + * Changes: + * + * Date: Mar 2006 + * + * Environment: Xen Minimal OS + * Description: Console interface. + * + * Handles console I/O. Defines printk. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Copies all print output to the Xen emergency console apart + of standard dom0 handled console */ +#define USE_XEN_CONSOLE + + +/* If console not initialised the printk will be sent to xen serial line + NOTE: you need to enable verbose in xen/Rules.mk for it to work. */ +static int console_initialised = 0; + +__attribute__((weak)) void console_input(char * buf, unsigned len) +{ + if(len > 0) + { + /* Just repeat what's written */ + buf[len] = '\0'; + printk("%s", buf); + + if(buf[len-1] == '\r') + printk("\nNo console input handler.\n"); + } +} + +#ifndef HAVE_LIBC +void xencons_rx(char *buf, unsigned len, struct pt_regs *regs) +{ + console_input(buf, len); +} + +void xencons_tx(void) +{ + /* Do nothing, handled by _rx */ +} +#endif + + +void console_print(struct consfront_dev *dev, char *data, int length) +{ + char *curr_char, saved_char; + char copied_str[length+1]; + char *copied_ptr; + int part_len; + int (*ring_send_fn)(struct consfront_dev *dev, const char *data, unsigned length); + + if(!console_initialised) + ring_send_fn = xencons_ring_send_no_notify; + else + ring_send_fn = xencons_ring_send; + + copied_ptr = copied_str; + memcpy(copied_ptr, data, length); + for(curr_char = copied_ptr; curr_char < copied_ptr+length-1; curr_char++) + { + if(*curr_char == '\n') + { + *curr_char = '\r'; + saved_char = *(curr_char+1); + *(curr_char+1) = '\n'; + part_len = curr_char - copied_ptr + 2; + ring_send_fn(dev, copied_ptr, part_len); + *(curr_char+1) = saved_char; + copied_ptr = curr_char+1; + length -= part_len - 1; + } + } + + if (copied_ptr[length-1] == '\n') { + copied_ptr[length-1] = '\r'; + copied_ptr[length] = '\n'; + length++; + } + + ring_send_fn(dev, copied_ptr, length); +} + +void print(int direct, const char *fmt, va_list args) +{ + static char buf[1024]; + + (void)vsnprintf(buf, sizeof(buf), fmt, args); + + if(direct) + { + (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(buf), buf); + return; + } else { +#ifndef USE_XEN_CONSOLE + if(!console_initialised) +#endif + (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(buf), buf); + + console_print(NULL, buf, strlen(buf)); + } +} + +void printk(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + print(0, fmt, args); + va_end(args); +} + +void xprintk(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + print(1, fmt, args); + va_end(args); +} +void init_console(void) +{ + printk("Initialising console ... "); + xencons_ring_init(); + console_initialised = 1; + /* This is also required to notify the daemon */ + printk("done.\n"); +} diff -Nru xen-4.9.0/extras/mini-os/console/console.h xen-4.9.2/extras/mini-os/console/console.h --- xen-4.9.0/extras/mini-os/console/console.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/console/console.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,2 @@ + +void console_handle_input(evtchn_port_t port, struct pt_regs *regs, void *data); diff -Nru xen-4.9.0/extras/mini-os/console/xenbus.c xen-4.9.2/extras/mini-os/console/xenbus.c --- xen-4.9.0/extras/mini-os/console/xenbus.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/console/xenbus.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,195 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "console.h" + +void free_consfront(struct consfront_dev *dev) +{ + char* err = NULL; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/state") + 1]; + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("free_consfront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("free_consfront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close; + } + +close: + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err); + + mask_evtchn(dev->evtchn); + unbind_evtchn(dev->evtchn); + free(dev->backend); + free(dev->nodename); + + gnttab_end_access(dev->ring_ref); + + free_page(dev->ring); + free(dev); +} + +struct consfront_dev *init_consfront(char *_nodename) +{ + xenbus_transaction_t xbt; + char* err = NULL; + char* message=NULL; + int retry=0; + char* msg = NULL; + char nodename[256]; + char path[256]; + static int consfrontends = 3; + struct consfront_dev *dev; + int res; + + if (!_nodename) + snprintf(nodename, sizeof(nodename), "device/console/%d", consfrontends); + else { + strncpy(nodename, _nodename, sizeof(nodename) - 1); + nodename[sizeof(nodename) - 1] = 0; + } + + printk("******************* CONSFRONT for %s **********\n\n\n", nodename); + + consfrontends++; + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + if ((res = xenbus_read_integer(path)) < 0) + goto error; + else + dev->dom = res; + evtchn_alloc_unbound(dev->dom, console_handle_input, dev, &dev->evtchn); + + dev->ring = (struct xencons_interface *) alloc_page(); + memset(dev->ring, 0, PAGE_SIZE); + dev->ring_ref = gnttab_grant_access(dev->dom, virt_to_mfn(dev->ring), 0); + + dev->events = NULL; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "ring-ref","%u", + dev->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "port", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); + if (err) { + message = "writing protocol"; + goto abort_transaction; + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateConnected); + if (err) { + message = "switching state"; + goto abort_transaction; + } + + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + if (msg) { + printk("Error %s when reading the backend path %s\n", msg, path); + goto error; + } + + printk("backend at %s\n", dev->backend); + + { + XenbusState state; + char path[strlen(dev->backend) + strlen("/state") + 1]; + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + msg = NULL; + state = xenbus_read_integer(path); + while (msg == NULL && state < XenbusStateConnected) + msg = xenbus_wait_for_state_change(path, &state, &dev->events); + if (msg != NULL || state != XenbusStateConnected) { + printk("backend not available, state=%d\n", state); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + } + unmask_evtchn(dev->evtchn); + + printk("**************************\n"); + + return dev; + +error: + free(msg); + free(err); + free_consfront(dev); + return NULL; +} + +void fini_console(struct consfront_dev *dev) +{ + if (dev) free_consfront(dev); +} + diff -Nru xen-4.9.0/extras/mini-os/console/xencons_ring.c xen-4.9.2/extras/mini-os/console/xencons_ring.c --- xen-4.9.0/extras/mini-os/console/xencons_ring.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/console/xencons_ring.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,217 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "console.h" + +DECLARE_WAIT_QUEUE_HEAD(console_queue); + +static struct xencons_interface *console_ring; +uint32_t console_evtchn; + +#ifdef CONFIG_PARAVIRT +void get_console(void *p) +{ + start_info_t *si = p; + + console_ring = mfn_to_virt(si->console.domU.mfn); + console_evtchn = si->console.domU.evtchn; +} +#else +void get_console(void *p) +{ + uint64_t v = -1; + + hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); + console_evtchn = v; + + hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); + console_ring = (struct xencons_interface *)map_frame_virt(v); +} +#endif + +static inline void notify_daemon(struct consfront_dev *dev) +{ + /* Use evtchn: this is called early, before irq is set up. */ + if (!dev) + notify_remote_via_evtchn(console_evtchn); + else + notify_remote_via_evtchn(dev->evtchn); +} + +static inline struct xencons_interface *xencons_interface(void) +{ + return console_evtchn ? console_ring : NULL; +} + +int xencons_ring_send_no_notify(struct consfront_dev *dev, const char *data, unsigned len) +{ + int sent = 0; + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; + + if (!dev) + intf = xencons_interface(); + else + intf = dev->ring; + if (!intf) + return sent; + + cons = intf->out_cons; + prod = intf->out_prod; + mb(); + BUG_ON((prod - cons) > sizeof(intf->out)); + + while ((sent < len) && ((prod - cons) < sizeof(intf->out))) + intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; + + wmb(); + intf->out_prod = prod; + + return sent; +} + +int xencons_ring_send(struct consfront_dev *dev, const char *data, unsigned len) +{ + int sent; + + sent = xencons_ring_send_no_notify(dev, data, len); + notify_daemon(dev); + + return sent; +} + + + +void console_handle_input(evtchn_port_t port, struct pt_regs *regs, void *data) +{ + struct consfront_dev *dev = (struct consfront_dev *) data; +#ifdef HAVE_LIBC + int fd = dev ? dev->fd : -1; + + if (fd != -1) + files[fd].read = 1; + + wake_up(&console_queue); +#else + struct xencons_interface *intf = xencons_interface(); + XENCONS_RING_IDX cons, prod; + + cons = intf->in_cons; + prod = intf->in_prod; + mb(); + BUG_ON((prod - cons) > sizeof(intf->in)); + + while (cons != prod) { + xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs); + cons++; + } + + mb(); + intf->in_cons = cons; + + notify_daemon(dev); + + xencons_tx(); +#endif +} + +#ifdef HAVE_LIBC +int xencons_ring_avail(struct consfront_dev *dev) +{ + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; + + if (!dev) + intf = xencons_interface(); + else + intf = dev->ring; + + cons = intf->in_cons; + prod = intf->in_prod; + mb(); + BUG_ON((prod - cons) > sizeof(intf->in)); + + return prod - cons; +} + +int xencons_ring_recv(struct consfront_dev *dev, char *data, unsigned len) +{ + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; + unsigned filled = 0; + + if (!dev) + intf = xencons_interface(); + else + intf = dev->ring; + + cons = intf->in_cons; + prod = intf->in_prod; + mb(); + BUG_ON((prod - cons) > sizeof(intf->in)); + + while (filled < len && cons + filled != prod) { + data[filled] = *(intf->in + MASK_XENCONS_IDX(cons + filled, intf->in)); + filled++; + } + + mb(); + intf->in_cons = cons + filled; + + notify_daemon(dev); + + return filled; +} +#endif + +struct consfront_dev *xencons_ring_init(void) +{ + int err; + struct consfront_dev *dev; + + if (!console_evtchn) + return 0; + + dev = malloc(sizeof(struct consfront_dev)); + memset(dev, 0, sizeof(struct consfront_dev)); + dev->nodename = "device/console"; + dev->dom = 0; + dev->backend = 0; + dev->ring_ref = 0; + +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + dev->evtchn = console_evtchn; + dev->ring = xencons_interface(); + + err = bind_evtchn(dev->evtchn, console_handle_input, dev); + if (err <= 0) { + printk("XEN console request chn bind failed %i\n", err); + free(dev); + return NULL; + } + unmask_evtchn(dev->evtchn); + + /* In case we have in-flight data after save/restore... */ + notify_daemon(dev); + + return dev; +} + +void xencons_resume(void) +{ + (void)xencons_ring_init(); +} + diff -Nru xen-4.9.0/extras/mini-os/COPYING xen-4.9.2/extras/mini-os/COPYING --- xen-4.9.0/extras/mini-os/COPYING 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/COPYING 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,36 @@ +Certain files in this directory are licensed by the GNU +General Public License version 2 (GPLv2). By default these +files are not built and linked into MiniOs. Enabling them +will cause the whole work to become covered by the GPLv2. + +The current set of GPLv2 features are: +CONFIG_TPMFRONT +CONFIG_TPMBACK +CONFIG_TPM_TIS + +Do not use these if you do not want your MiniOS build to become +GPL licensed! + +Copyright (c) 2009 Citrix Systems, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + diff -Nru xen-4.9.0/extras/mini-os/daytime.c xen-4.9.2/extras/mini-os/daytime.c --- xen-4.9.0/extras/mini-os/daytime.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/daytime.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,67 @@ +/* + * daytime.c: a simple network service based on lwIP and mini-os + * + * Tim Deegan , July 2007 + */ + +#include +#include +#include +#include +#include + +static char message[29]; + +void run_server(void *p) +{ + struct ip_addr listenaddr = { 0 }; + struct netconn *listener; + struct netconn *session; + struct timeval tv; + err_t rc; + + start_networking(); + + if (0) { + struct ip_addr ipaddr = { htonl(0x0a000001) }; + struct ip_addr netmask = { htonl(0xff000000) }; + struct ip_addr gw = { 0 }; + networking_set_addr(&ipaddr, &netmask, &gw); + } + + tprintk("Opening connection\n"); + + listener = netconn_new(NETCONN_TCP); + tprintk("Connection at %p\n", listener); + + rc = netconn_bind(listener, &listenaddr, 13); + if (rc != ERR_OK) { + tprintk("Failed to bind connection: %i\n", rc); + return; + } + + rc = netconn_listen(listener); + if (rc != ERR_OK) { + tprintk("Failed to listen on connection: %i\n", rc); + return; + } + + while (1) { + session = netconn_accept(listener); + if (session == NULL) + continue; + + gettimeofday(&tv, NULL); + sprintf(message, "%20lu.%6.6lu\n", tv.tv_sec, tv.tv_usec); + (void) netconn_write(session, message, strlen(message), NETCONN_COPY); + (void) netconn_disconnect(session); + (void) netconn_delete(session); + } +} + + +int app_main(void *p) +{ + create_thread("server", run_server, NULL); + return 0; +} diff -Nru xen-4.9.0/extras/mini-os/domain_config xen-4.9.2/extras/mini-os/domain_config --- xen-4.9.0/extras/mini-os/domain_config 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/domain_config 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,19 @@ +# -*- mode: python; -*- +#============================================================================ +# Python configuration setup for 'xm create'. +# This script sets the parameters used when a domain is created using 'xm create'. +# You use a separate script for each domain you want to create, or +# you can set the parameters for the domain on the xm command line. +#============================================================================ + +#---------------------------------------------------------------------------- +# Kernel image file. +kernel = "mini-os.gz" + +# Initial memory allocation (in megabytes) for the new domain. +memory = 32 + +# A name for your domain. All domains must have different names. +name = "Mini-OS" + +on_crash = 'destroy' diff -Nru xen-4.9.0/extras/mini-os/events.c xen-4.9.2/extras/mini-os/events.c --- xen-4.9.0/extras/mini-os/events.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/events.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,268 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: events.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: Jul 2003, changes Jun 2005 + * + * Environment: Xen Minimal OS + * Description: Deals with events recieved on event channels + * + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include + +#define NR_EVS 1024 + +/* this represents a event handler. Chaining or sharing is not allowed */ +typedef struct _ev_action_t { + evtchn_handler_t handler; + void *data; + uint32_t count; +} ev_action_t; + +static ev_action_t ev_actions[NR_EVS]; +void default_handler(evtchn_port_t port, struct pt_regs *regs, void *data); + +static unsigned long bound_ports[NR_EVS/(8*sizeof(unsigned long))]; + +void unbind_all_ports(void) +{ + int i; + int cpu = 0; + shared_info_t *s = HYPERVISOR_shared_info; + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; + + for ( i = 0; i < NR_EVS; i++ ) + { + if ( i == console_evtchn || i == xenbus_evtchn ) + continue; + + if ( test_and_clear_bit(i, bound_ports) ) + { + printk("port %d still bound!\n", i); + unbind_evtchn(i); + } + } + vcpu_info->evtchn_upcall_pending = 0; + vcpu_info->evtchn_pending_sel = 0; +} + +/* + * Demux events to different handlers. + */ +int do_event(evtchn_port_t port, struct pt_regs *regs) +{ + ev_action_t *action; + + clear_evtchn(port); + + if ( port >= NR_EVS ) + { + printk("WARN: do_event(): Port number too large: %d\n", port); + return 1; + } + + action = &ev_actions[port]; + action->count++; + + /* call the handler */ + action->handler(port, regs, action->data); + + return 1; + +} + +evtchn_port_t bind_evtchn(evtchn_port_t port, evtchn_handler_t handler, + void *data) +{ + if ( ev_actions[port].handler != default_handler ) + printk("WARN: Handler for port %d already registered, replacing\n", + port); + + ev_actions[port].data = data; + wmb(); + ev_actions[port].handler = handler; + set_bit(port, bound_ports); + + return port; +} + +void unbind_evtchn(evtchn_port_t port ) +{ + struct evtchn_close close; + int rc; + + if ( ev_actions[port].handler == default_handler ) + printk("WARN: No handler for port %d when unbinding\n", port); + mask_evtchn(port); + clear_evtchn(port); + + ev_actions[port].handler = default_handler; + wmb(); + ev_actions[port].data = NULL; + clear_bit(port, bound_ports); + + close.port = port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + if ( rc ) + printk("WARN: close_port %d failed rc=%d. ignored\n", port, rc); +} + +evtchn_port_t bind_virq(uint32_t virq, evtchn_handler_t handler, void *data) +{ + evtchn_bind_virq_t op; + int rc; + + /* Try to bind the virq to a port */ + op.virq = virq; + op.vcpu = smp_processor_id(); + + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &op); + if (rc != 0) + { + printk("Failed to bind virtual IRQ %d with rc=%d\n", virq, rc); + return -1; + } + bind_evtchn(op.port, handler, data); + return op.port; +} + +evtchn_port_t bind_pirq(uint32_t pirq, int will_share, + evtchn_handler_t handler, void *data) +{ + evtchn_bind_pirq_t op; + int rc; + + /* Try to bind the pirq to a port */ + op.pirq = pirq; + op.flags = will_share ? BIND_PIRQ__WILL_SHARE : 0; + + if ( (rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &op)) != 0 ) + { + printk("Failed to bind physical IRQ %d with rc=%d\n", pirq, rc); + return -1; + } + bind_evtchn(op.port, handler, data); + return op.port; +} + +/* + * Initially all events are without a handler and disabled + */ +void init_events(void) +{ + int i; + + /* initialize event handler */ + for ( i = 0; i < NR_EVS; i++ ) + { + ev_actions[i].handler = default_handler; + mask_evtchn(i); + } + + arch_init_events(); +} + +void fini_events(void) +{ + /* Dealloc all events */ + arch_unbind_ports(); + unbind_all_ports(); + arch_fini_events(); +} + +void default_handler(evtchn_port_t port, struct pt_regs *regs, void *ignore) +{ + printk("[Port %d] - event received\n", port); +} + +/* Create a port available to the pal for exchanging notifications. + Returns the result of the hypervisor call. */ + +/* Unfortunate confusion of terminology: the port is unbound as far + as Xen is concerned, but we automatically bind a handler to it + from inside mini-os. */ + +int evtchn_alloc_unbound(domid_t pal, evtchn_handler_t handler, + void *data, evtchn_port_t *port) +{ + int rc; + + evtchn_alloc_unbound_t op; + op.dom = DOMID_SELF; + op.remote_dom = pal; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op); + if ( rc ) + { + printk("ERROR: alloc_unbound failed with rc=%d", rc); + return rc; + } + *port = bind_evtchn(op.port, handler, data); + return rc; +} + +/* Connect to a port so as to allow the exchange of notifications with + the pal. Returns the result of the hypervisor call. */ + +int evtchn_bind_interdomain(domid_t pal, evtchn_port_t remote_port, + evtchn_handler_t handler, void *data, + evtchn_port_t *local_port) +{ + int rc; + evtchn_port_t port; + evtchn_bind_interdomain_t op; + op.remote_dom = pal; + op.remote_port = remote_port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &op); + if ( rc ) + { + printk("ERROR: bind_interdomain failed with rc=%d", rc); + return rc; + } + port = op.local_port; + *local_port = bind_evtchn(port, handler, data); + return rc; +} + +int evtchn_get_peercontext(evtchn_port_t local_port, char *ctx, int size) +{ + int rc; + uint32_t sid; + struct xen_flask_op op; + op.cmd = FLASK_GET_PEER_SID; + op.interface_version = XEN_FLASK_INTERFACE_VERSION; + op.u.peersid.evtchn = local_port; + rc = HYPERVISOR_xsm_op(&op); + if (rc) + return rc; + sid = op.u.peersid.sid; + op.cmd = FLASK_SID_TO_CONTEXT; + op.u.sid_context.sid = sid; + op.u.sid_context.size = size; + set_xen_guest_handle(op.u.sid_context.context, ctx); + rc = HYPERVISOR_xsm_op(&op); + return rc; +} + + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/fbfront.c xen-4.9.2/extras/mini-os/fbfront.c --- xen-4.9.0/extras/mini-os/fbfront.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/fbfront.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,710 @@ +/* + * Frame Buffer + Keyboard driver for Mini-OS. + * Samuel Thibault , 2008 + * Based on blkfront.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DECLARE_WAIT_QUEUE_HEAD(kbdfront_queue); + + + + + + +struct kbdfront_dev { + domid_t dom; + + struct xenkbd_page *page; + evtchn_port_t evtchn; + + char *nodename; + char *backend; + + xenbus_event_queue events; + +#ifdef HAVE_LIBC + int fd; +#endif +}; + +void kbdfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ +#ifdef HAVE_LIBC + struct kbdfront_dev *dev = data; + int fd = dev->fd; + + if (fd != -1) + files[fd].read = 1; +#endif + wake_up(&kbdfront_queue); +} + +static void free_kbdfront(struct kbdfront_dev *dev) +{ + mask_evtchn(dev->evtchn); + + free(dev->backend); + + free_page(dev->page); + + unbind_evtchn(dev->evtchn); + + free(dev->nodename); + free(dev); +} + +struct kbdfront_dev *init_kbdfront(char *_nodename, int abs_pointer) +{ + xenbus_transaction_t xbt; + char* err; + char* message=NULL; + struct xenkbd_page *s; + int retry=0; + char* msg = NULL; + char* nodename = _nodename ? _nodename : "device/vkbd/0"; + struct kbdfront_dev *dev; + + char path[strlen(nodename) + strlen("/backend-id") + 1]; + + printk("******************* KBDFRONT for %s **********\n\n\n", nodename); + + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dev->dom = xenbus_read_integer(path); + evtchn_alloc_unbound(dev->dom, kbdfront_handler, dev, &dev->evtchn); + + dev->page = s = (struct xenkbd_page*) alloc_page(); + memset(s,0,PAGE_SIZE); + + dev->events = NULL; + + s->in_cons = s->in_prod = 0; + s->out_cons = s->out_prod = 0; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "page-ref","%lu", virt_to_mfn(s)); + if (err) { + message = "writing page-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "event-channel", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + if (abs_pointer) { + err = xenbus_printf(xbt, nodename, "request-abs-pointer", "1"); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateInitialised); + if (err) { + printk("error writing initialized: %s\n", err); + free(err); + } + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + if (msg) { + printk("Error %s when reading the backend path %s\n", msg, path); + goto error; + } + + printk("backend at %s\n", dev->backend); + + { + XenbusState state; + char path[strlen(dev->backend) + strlen("/state") + 1]; + char frontpath[strlen(nodename) + strlen("/state") + 1]; + + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + + err = NULL; + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateConnected) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + if (state != XenbusStateConnected) { + printk("backend not available, state=%d\n", state); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + + printk("%s connected\n", dev->backend); + + snprintf(frontpath, sizeof(frontpath), "%s/state", nodename); + if((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected)) + != NULL) { + printk("error switching state: %s\n", err); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + } + unmask_evtchn(dev->evtchn); + + printk("************************** KBDFRONT\n"); + + return dev; +error: + free(msg); + free(err); + free_kbdfront(dev); + return NULL; +} + +int kbdfront_receive(struct kbdfront_dev *dev, union xenkbd_in_event *buf, int n) +{ + struct xenkbd_page *page = dev->page; + uint32_t prod, cons; + int i; + +#ifdef HAVE_LIBC + if (dev->fd != -1) { + files[dev->fd].read = 0; + mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */ + } +#endif + + prod = page->in_prod; + + if (prod == page->in_cons) + return 0; + + rmb(); /* ensure we see ring contents up to prod */ + + for (i = 0, cons = page->in_cons; i < n && cons != prod; i++, cons++) + memcpy(buf + i, &XENKBD_IN_RING_REF(page, cons), sizeof(*buf)); + + mb(); /* ensure we got ring contents */ + page->in_cons = cons; + notify_remote_via_evtchn(dev->evtchn); + +#ifdef HAVE_LIBC + if (cons != prod && dev->fd != -1) + /* still some events to read */ + files[dev->fd].read = 1; +#endif + + return i; +} + + +void shutdown_kbdfront(struct kbdfront_dev *dev) +{ + char* err = NULL, *err2; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/request-abs-pointer") + 1]; + + printk("close kbd: backend at %s\n",dev->backend); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("shutdown_kbdfront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close_kbdfront; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("shutdown_kbdfront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close_kbdfront; + } + state = xenbus_read_integer(path); + while (state < XenbusStateClosed) { + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + } + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { + printk("shutdown_kbdfront: error changing state to %d: %s\n", + XenbusStateInitialising, err); + goto close_kbdfront; + } + state = xenbus_read_integer(path); + while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed)) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + +close_kbdfront: + free(err); + err2 = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err2); + + snprintf(nodename, sizeof(nodename), "%s/page-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/request-abs-pointer", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + + if (!err) + free_kbdfront(dev); +} + +#ifdef HAVE_LIBC +int kbdfront_open(struct kbdfront_dev *dev) +{ + dev->fd = alloc_fd(FTYPE_KBD); + printk("kbd_open(%s) -> %d\n", dev->nodename, dev->fd); + files[dev->fd].kbd.dev = dev; + return dev->fd; +} +#endif + + + + + +DECLARE_WAIT_QUEUE_HEAD(fbfront_queue); + + + + + + +struct fbfront_dev { + domid_t dom; + + struct xenfb_page *page; + evtchn_port_t evtchn; + + char *nodename; + char *backend; + int request_update; + + int width; + int height; + int depth; + int stride; + int mem_length; + int offset; + + xenbus_event_queue events; + +#ifdef HAVE_LIBC + int fd; +#endif +}; + +void fbfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ +#ifdef HAVE_LIBC + struct fbfront_dev *dev = data; + int fd = dev->fd; + + if (fd != -1) + files[fd].read = 1; +#endif + wake_up(&fbfront_queue); +} + +static void free_fbfront(struct fbfront_dev *dev) +{ + mask_evtchn(dev->evtchn); + + free(dev->backend); + + free_page(dev->page); + + unbind_evtchn(dev->evtchn); + + free(dev->nodename); + free(dev); +} + +int fbfront_receive(struct fbfront_dev *dev, union xenfb_in_event *buf, int n) +{ + struct xenfb_page *page = dev->page; + uint32_t prod, cons; + int i; + +#ifdef HAVE_LIBC + if (dev->fd != -1) { + files[dev->fd].read = 0; + mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */ + } +#endif + + prod = page->in_prod; + + if (prod == page->in_cons) + return 0; + + rmb(); /* ensure we see ring contents up to prod */ + + for (i = 0, cons = page->in_cons; i < n && cons != prod; i++, cons++) + memcpy(buf + i, &XENFB_IN_RING_REF(page, cons), sizeof(*buf)); + + mb(); /* ensure we got ring contents */ + page->in_cons = cons; + notify_remote_via_evtchn(dev->evtchn); + +#ifdef HAVE_LIBC + if (cons != prod && dev->fd != -1) + /* still some events to read */ + files[dev->fd].read = 1; +#endif + + return i; +} + +struct fbfront_dev *init_fbfront(char *_nodename, unsigned long *mfns, int width, int height, int depth, int stride, int n) +{ + xenbus_transaction_t xbt; + char* err; + char* message=NULL; + struct xenfb_page *s; + int retry=0; + char* msg=NULL; + int i, j; + struct fbfront_dev *dev; + int max_pd; + unsigned long mapped; + char* nodename = _nodename ? _nodename : "device/vfb/0"; + + char path[strlen(nodename) + strlen("/backend-id") + 1]; + + printk("******************* FBFRONT for %s **********\n\n\n", nodename); + + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dev->dom = xenbus_read_integer(path); + evtchn_alloc_unbound(dev->dom, fbfront_handler, dev, &dev->evtchn); + + dev->page = s = (struct xenfb_page*) alloc_page(); + memset(s,0,PAGE_SIZE); + + s->in_cons = s->in_prod = 0; + s->out_cons = s->out_prod = 0; + dev->width = s->width = width; + dev->height = s->height = height; + dev->depth = s->depth = depth; + dev->stride = s->line_length = stride; + dev->mem_length = s->mem_length = n * PAGE_SIZE; + dev->offset = 0; + dev->events = NULL; + + max_pd = sizeof(s->pd) / sizeof(s->pd[0]); + mapped = 0; + + for (i = 0; mapped < n && i < max_pd; i++) { + unsigned long *pd = (unsigned long *) alloc_page(); + for (j = 0; mapped < n && j < PAGE_SIZE / sizeof(unsigned long); j++) + pd[j] = mfns[mapped++]; + for ( ; j < PAGE_SIZE / sizeof(unsigned long); j++) + pd[j] = 0; + s->pd[i] = virt_to_mfn(pd); + } + for ( ; i < max_pd; i++) + s->pd[i] = 0; + + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "page-ref","%lu", virt_to_mfn(s)); + if (err) { + message = "writing page-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "event-channel", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "protocol", "%s", + XEN_IO_PROTO_ABI_NATIVE); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "feature-update", "1"); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateInitialised); + if (err) { + message = "switching state"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + if (msg) { + printk("Error %s when reading the backend path %s\n", msg, path); + goto error; + } + + printk("backend at %s\n", dev->backend); + + { + XenbusState state; + char path[strlen(dev->backend) + strlen("/request-update") + 1]; + char frontpath[strlen(nodename) + strlen("/state") + 1]; + + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + + err = NULL; + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateConnected) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + if (state != XenbusStateConnected) { + printk("backend not available, state=%d\n", state); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + + printk("%s connected\n", dev->backend); + + snprintf(path, sizeof(path), "%s/request-update", dev->backend); + dev->request_update = xenbus_read_integer(path); + + snprintf(frontpath, sizeof(frontpath), "%s/state", nodename); + if ((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected)) + != NULL) { + printk("error switching state: %s\n", err); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + } + unmask_evtchn(dev->evtchn); + + printk("************************** FBFRONT\n"); + + return dev; + +error: + free(msg); + free(err); + free_fbfront(dev); + return NULL; +} + +static void fbfront_out_event(struct fbfront_dev *dev, union xenfb_out_event *event) +{ + struct xenfb_page *page = dev->page; + uint32_t prod; + DEFINE_WAIT(w); + + add_waiter(w, fbfront_queue); + while (page->out_prod - page->out_cons == XENFB_OUT_RING_LEN) + schedule(); + remove_waiter(w, fbfront_queue); + + prod = page->out_prod; + mb(); /* ensure ring space available */ + XENFB_OUT_RING_REF(page, prod) = *event; + wmb(); /* ensure ring contents visible */ + page->out_prod = prod + 1; + notify_remote_via_evtchn(dev->evtchn); +} + +void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int height) +{ + struct xenfb_update update; + + if (dev->request_update <= 0) + return; + + if (x < 0) { + width += x; + x = 0; + } + if (x + width > dev->width) + width = dev->width - x; + + if (y < 0) { + height += y; + y = 0; + } + if (y + height > dev->height) + height = dev->height - y; + + if (width <= 0 || height <= 0) + return; + + update.type = XENFB_TYPE_UPDATE; + update.x = x; + update.y = y; + update.width = width; + update.height = height; + fbfront_out_event(dev, (union xenfb_out_event *) &update); +} + +void fbfront_resize(struct fbfront_dev *dev, int width, int height, int stride, int depth, int offset) +{ + struct xenfb_resize resize; + + resize.type = XENFB_TYPE_RESIZE; + dev->width = resize.width = width; + dev->height = resize.height = height; + dev->stride = resize.stride = stride; + dev->depth = resize.depth = depth; + dev->offset = resize.offset = offset; + fbfront_out_event(dev, (union xenfb_out_event *) &resize); +} + +void shutdown_fbfront(struct fbfront_dev *dev) +{ + char* err = NULL, *err2; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/feature-update") + 1]; + + printk("close fb: backend at %s\n",dev->backend); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("shutdown_fbfront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close_fbfront; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("shutdown_fbfront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close_fbfront; + } + state = xenbus_read_integer(path); + if (state < XenbusStateClosed) { + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + } + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { + printk("shutdown_fbfront: error changing state to %d: %s\n", + XenbusStateInitialising, err); + goto close_fbfront; + } + state = xenbus_read_integer(path); + while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed)) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + +close_fbfront: + free(err); + err2 = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err2); + + snprintf(nodename, sizeof(nodename), "%s/page-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/protocol", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/feature-update", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + + if (!err) + free_fbfront(dev); +} + +#ifdef HAVE_LIBC +int fbfront_open(struct fbfront_dev *dev) +{ + dev->fd = alloc_fd(FTYPE_FB); + printk("fb_open(%s) -> %d\n", dev->nodename, dev->fd); + files[dev->fd].fb.dev = dev; + return dev->fd; +} +#endif + diff -Nru xen-4.9.0/extras/mini-os/.gitignore xen-4.9.2/extras/mini-os/.gitignore --- xen-4.9.0/extras/mini-os/.gitignore 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/.gitignore 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,16 @@ +*~ +*.o +*.a +*.swp +cscope.* +GPATH +GRTAGS +GTAGS +TAGS +tags + +arch/x86/minios-x86*.lds +include/list.h +mini-os +mini-os.gz +minios-config.mk diff -Nru xen-4.9.0/extras/mini-os/gntmap.c xen-4.9.2/extras/mini-os/gntmap.c --- xen-4.9.0/extras/mini-os/gntmap.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/gntmap.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,250 @@ +/* + * Manages grant mappings from other domains. + * + * Diego Ongaro , July 2008 + * + * Files of type FTYPE_GNTMAP contain a gntmap, which is an array of + * (host address, grant handle) pairs. Grant handles come from a hypervisor map + * operation and are needed for the corresponding unmap. + * + * This is a rather naive implementation in terms of performance. If we start + * using it frequently, there's definitely some low-hanging fruit here. + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +//#define GNTMAP_DEBUG +#ifdef GNTMAP_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(gntmap.c:%d): %s" _f "\n", __LINE__, __func__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + + +#define DEFAULT_MAX_GRANTS 128 + +struct gntmap_entry { + unsigned long host_addr; + grant_handle_t handle; +}; + +static inline int +gntmap_entry_used(struct gntmap_entry *entry) +{ + return entry->host_addr != 0; +} + +static struct gntmap_entry* +gntmap_find_free_entry(struct gntmap *map) +{ + int i; + + for (i = 0; i < map->nentries; i++) { + if (!gntmap_entry_used(&map->entries[i])) + return &map->entries[i]; + } + + DEBUG("(map=%p): all %d entries full", + map, map->nentries); + return NULL; +} + +static struct gntmap_entry* +gntmap_find_entry(struct gntmap *map, unsigned long addr) +{ + int i; + + for (i = 0; i < map->nentries; i++) { + if (map->entries[i].host_addr == addr) + return &map->entries[i]; + } + return NULL; +} + +int +gntmap_set_max_grants(struct gntmap *map, int count) +{ + DEBUG("(map=%p, count=%d)", map, count); + + if (map->nentries != 0) + return -EBUSY; + + map->entries = xmalloc_array(struct gntmap_entry, count); + if (map->entries == NULL) + return -ENOMEM; + + memset(map->entries, 0, sizeof(struct gntmap_entry) * count); + map->nentries = count; + return 0; +} + +static int +_gntmap_map_grant_ref(struct gntmap_entry *entry, + unsigned long host_addr, + uint32_t domid, + uint32_t ref, + int writable) +{ + struct gnttab_map_grant_ref op; + int rc; + + op.ref = (grant_ref_t) ref; + op.dom = (domid_t) domid; + op.host_addr = (uint64_t) host_addr; + op.flags = GNTMAP_host_map; + if (!writable) + op.flags |= GNTMAP_readonly; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); + if (rc != 0 || op.status != GNTST_okay) { + printk("GNTTABOP_map_grant_ref failed: " + "returned %d, status %" PRId16 "\n", + rc, op.status); + return rc != 0 ? rc : op.status; + } + + entry->host_addr = host_addr; + entry->handle = op.handle; + return 0; +} + +static int +_gntmap_unmap_grant_ref(struct gntmap_entry *entry) +{ + struct gnttab_unmap_grant_ref op; + int rc; + + op.host_addr = (uint64_t) entry->host_addr; + op.dev_bus_addr = 0; + op.handle = entry->handle; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); + if (rc != 0 || op.status != GNTST_okay) { + printk("GNTTABOP_unmap_grant_ref failed: " + "returned %d, status %" PRId16 "\n", + rc, op.status); + return rc != 0 ? rc : op.status; + } + + entry->host_addr = 0; + return 0; +} + +int +gntmap_munmap(struct gntmap *map, unsigned long start_address, int count) +{ + int i, rc; + struct gntmap_entry *ent; + + DEBUG("(map=%p, start_address=%lx, count=%d)", + map, start_address, count); + + for (i = 0; i < count; i++) { + ent = gntmap_find_entry(map, start_address + PAGE_SIZE * i); + if (ent == NULL) { + printk("gntmap: tried to munmap unknown page\n"); + return -EINVAL; + } + + rc = _gntmap_unmap_grant_ref(ent); + if (rc != 0) + return rc; + } + + return 0; +} + +void* +gntmap_map_grant_refs(struct gntmap *map, + uint32_t count, + uint32_t *domids, + int domids_stride, + uint32_t *refs, + int writable) +{ + unsigned long addr; + struct gntmap_entry *ent; + int i; + + DEBUG("(map=%p, count=%" PRIu32 ", " + "domids=%p [%" PRIu32 "...], domids_stride=%d, " + "refs=%p [%" PRIu32 "...], writable=%d)", + map, count, + domids, domids == NULL ? 0 : domids[0], domids_stride, + refs, refs == NULL ? 0 : refs[0], writable); + + (void) gntmap_set_max_grants(map, DEFAULT_MAX_GRANTS); + + addr = allocate_ondemand((unsigned long) count, 1); + if (addr == 0) + return NULL; + + for (i = 0; i < count; i++) { + ent = gntmap_find_free_entry(map); + if (ent == NULL || + _gntmap_map_grant_ref(ent, + addr + PAGE_SIZE * i, + domids[i * domids_stride], + refs[i], + writable) != 0) { + + (void) gntmap_munmap(map, addr, i); + return NULL; + } + } + + return (void*) addr; +} + +void +gntmap_init(struct gntmap *map) +{ + DEBUG("(map=%p)", map); + map->nentries = 0; + map->entries = NULL; +} + +void +gntmap_fini(struct gntmap *map) +{ + struct gntmap_entry *ent; + int i; + + DEBUG("(map=%p)", map); + + for (i = 0; i < map->nentries; i++) { + ent = &map->entries[i]; + if (gntmap_entry_used(ent)) + (void) _gntmap_unmap_grant_ref(ent); + } + + xfree(map->entries); + map->entries = NULL; + map->nentries = 0; +} diff -Nru xen-4.9.0/extras/mini-os/gnttab.c xen-4.9.2/extras/mini-os/gnttab.c --- xen-4.9.0/extras/mini-os/gnttab.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/gnttab.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,196 @@ +/* + **************************************************************************** + * (C) 2006 - Cambridge University + **************************************************************************** + * + * File: gnttab.c + * Author: Steven Smith (sos22@cam.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: July 2006 + * + * Environment: Xen Minimal OS + * Description: Simple grant tables implementation. About as stupid as it's + * possible to be and still work. + * + **************************************************************************** + */ +#include +#include +#include +#include + +#define NR_RESERVED_ENTRIES 8 + +/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ +#define NR_GRANT_FRAMES 4 +#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_v1_t)) + +static grant_entry_v1_t *gnttab_table; +static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; +#ifdef GNT_DEBUG +static char inuse[NR_GRANT_ENTRIES]; +#endif +static __DECLARE_SEMAPHORE_GENERIC(gnttab_sem, 0); + +static void +put_free_entry(grant_ref_t ref) +{ + unsigned long flags; + local_irq_save(flags); +#ifdef GNT_DEBUG + BUG_ON(!inuse[ref]); + inuse[ref] = 0; +#endif + gnttab_list[ref] = gnttab_list[0]; + gnttab_list[0] = ref; + local_irq_restore(flags); + up(&gnttab_sem); +} + +static grant_ref_t +get_free_entry(void) +{ + unsigned int ref; + unsigned long flags; + down(&gnttab_sem); + local_irq_save(flags); + ref = gnttab_list[0]; + BUG_ON(ref < NR_RESERVED_ENTRIES || ref >= NR_GRANT_ENTRIES); + gnttab_list[0] = gnttab_list[ref]; +#ifdef GNT_DEBUG + BUG_ON(inuse[ref]); + inuse[ref] = 1; +#endif + local_irq_restore(flags); + return ref; +} + +grant_ref_t +gnttab_grant_access(domid_t domid, unsigned long frame, int readonly) +{ + grant_ref_t ref; + + ref = get_free_entry(); + gnttab_table[ref].frame = frame; + gnttab_table[ref].domid = domid; + wmb(); + readonly *= GTF_readonly; + gnttab_table[ref].flags = GTF_permit_access | readonly; + + return ref; +} + +grant_ref_t +gnttab_grant_transfer(domid_t domid, unsigned long pfn) +{ + grant_ref_t ref; + + ref = get_free_entry(); + gnttab_table[ref].frame = pfn; + gnttab_table[ref].domid = domid; + wmb(); + gnttab_table[ref].flags = GTF_accept_transfer; + + return ref; +} + +int +gnttab_end_access(grant_ref_t ref) +{ + uint16_t flags, nflags; + + BUG_ON(ref >= NR_GRANT_ENTRIES || ref < NR_RESERVED_ENTRIES); + + nflags = gnttab_table[ref].flags; + do { + if ((flags = nflags) & (GTF_reading|GTF_writing)) { + printk("WARNING: g.e. still in use! (%x)\n", flags); + return 0; + } + } while ((nflags = synch_cmpxchg(&gnttab_table[ref].flags, flags, 0)) != + flags); + + put_free_entry(ref); + return 1; +} + +unsigned long +gnttab_end_transfer(grant_ref_t ref) +{ + unsigned long frame; + uint16_t flags; + + BUG_ON(ref >= NR_GRANT_ENTRIES || ref < NR_RESERVED_ENTRIES); + + while (!((flags = gnttab_table[ref].flags) & GTF_transfer_committed)) { + if (synch_cmpxchg(&gnttab_table[ref].flags, flags, 0) == flags) { + printk("Release unused transfer grant.\n"); + put_free_entry(ref); + return 0; + } + } + + /* If a transfer is in progress then wait until it is completed. */ + while (!(flags & GTF_transfer_completed)) { + flags = gnttab_table[ref].flags; + } + + /* Read the frame number /after/ reading completion status. */ + rmb(); + frame = gnttab_table[ref].frame; + + put_free_entry(ref); + + return frame; +} + +grant_ref_t +gnttab_alloc_and_grant(void **map) +{ + unsigned long mfn; + grant_ref_t gref; + + *map = (void *)alloc_page(); + mfn = virt_to_mfn(*map); + gref = gnttab_grant_access(0, mfn, 0); + return gref; +} + +static const char * const gnttabop_error_msgs[] = GNTTABOP_error_msgs; + +const char * +gnttabop_error(int16_t status) +{ + status = -status; + if (status < 0 || status >= ARRAY_SIZE(gnttabop_error_msgs)) + return "bad status"; + else + return gnttabop_error_msgs[status]; +} + +void +init_gnttab(void) +{ + int i; + +#ifdef GNT_DEBUG + memset(inuse, 1, sizeof(inuse)); +#endif + for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) + put_free_entry(i); + + gnttab_table = arch_init_gnttab(NR_GRANT_FRAMES); + printk("gnttab_table mapped at %p.\n", gnttab_table); +} + +void +fini_gnttab(void) +{ + struct gnttab_setup_table setup; + + setup.dom = DOMID_SELF; + setup.nr_frames = 0; + + HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); +} diff -Nru xen-4.9.0/extras/mini-os/hypervisor.c xen-4.9.2/extras/mini-os/hypervisor.c --- xen-4.9.0/extras/mini-os/hypervisor.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/hypervisor.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,176 @@ +/****************************************************************************** + * hypervisor.c + * + * Communication to/from hypervisor. + * + * Copyright (c) 2002-2003, K A Fraser + * Copyright (c) 2005, Grzegorz Milos, gm281@cam.ac.uk,Intel Research Cambridge + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#define active_evtchns(cpu,sh,idx) \ + ((sh)->evtchn_pending[idx] & \ + ~(sh)->evtchn_mask[idx]) + +int in_callback; + +#ifndef CONFIG_PARAVIRT +extern shared_info_t shared_info; + +int hvm_get_parameter(int idx, uint64_t *value) +{ + struct xen_hvm_param xhv; + int ret; + + xhv.domid = DOMID_SELF; + xhv.index = idx; + ret = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); + if ( ret < 0 ) + BUG(); + + *value = xhv.value; + return ret; +} + +int hvm_set_parameter(int idx, uint64_t value) +{ + struct xen_hvm_param xhv; + + xhv.domid = DOMID_SELF; + xhv.index = idx; + xhv.value = value; + return HYPERVISOR_hvm_op(HVMOP_set_param, &xhv); +} + +shared_info_t *map_shared_info(void *p) +{ + struct xen_add_to_physmap xatp; + + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = virt_to_pfn(&shared_info); + if ( HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) + BUG(); + + return &shared_info; +} +#endif + +void do_hypervisor_callback(struct pt_regs *regs) +{ + unsigned long l1, l2, l1i, l2i; + unsigned int port; + int cpu = 0; + shared_info_t *s = HYPERVISOR_shared_info; + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; + + in_callback = 1; + + vcpu_info->evtchn_upcall_pending = 0; + /* NB x86. No need for a barrier here -- XCHG is a barrier on x86. */ +#if !defined(__i386__) && !defined(__x86_64__) + /* Clear master flag /before/ clearing selector flag. */ + wmb(); +#endif + l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); + while ( l1 != 0 ) + { + l1i = __ffs(l1); + l1 &= ~(1UL << l1i); + + while ( (l2 = active_evtchns(cpu, s, l1i)) != 0 ) + { + l2i = __ffs(l2); + l2 &= ~(1UL << l2i); + + port = (l1i * (sizeof(unsigned long) * 8)) + l2i; + do_event(port, regs); + } + } + + in_callback = 0; +} + +void force_evtchn_callback(void) +{ +#ifdef XEN_HAVE_PV_UPCALL_MASK + int save; +#endif + vcpu_info_t *vcpu; + vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; +#ifdef XEN_HAVE_PV_UPCALL_MASK + save = vcpu->evtchn_upcall_mask; +#endif + + while (vcpu->evtchn_upcall_pending) { +#ifdef XEN_HAVE_PV_UPCALL_MASK + vcpu->evtchn_upcall_mask = 1; +#endif + barrier(); + do_hypervisor_callback(NULL); + barrier(); +#ifdef XEN_HAVE_PV_UPCALL_MASK + vcpu->evtchn_upcall_mask = save; + barrier(); +#endif + }; +} + +inline void mask_evtchn(uint32_t port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_set_bit(port, &s->evtchn_mask[0]); +} + +inline void unmask_evtchn(uint32_t port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + vcpu_info_t *vcpu_info = &s->vcpu_info[smp_processor_id()]; + + synch_clear_bit(port, &s->evtchn_mask[0]); + + /* + * The following is basically the equivalent of 'hw_resend_irq'. Just like + * a real IO-APIC we 'lose the interrupt edge' if the channel is masked. + */ + if ( synch_test_bit (port, &s->evtchn_pending[0]) && + !synch_test_and_set_bit(port / (sizeof(unsigned long) * 8), + &vcpu_info->evtchn_pending_sel) ) + { + vcpu_info->evtchn_upcall_pending = 1; +#ifdef XEN_HAVE_PV_UPCALL_MASK + if ( !vcpu_info->evtchn_upcall_mask ) +#endif + force_evtchn_callback(); + } +} + +inline void clear_evtchn(uint32_t port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_clear_bit(port, &s->evtchn_pending[0]); +} diff -Nru xen-4.9.0/extras/mini-os/include/arch/cc.h xen-4.9.2/extras/mini-os/include/arch/cc.h --- xen-4.9.0/extras/mini-os/include/arch/cc.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arch/cc.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,87 @@ +/* + * lwip/arch/cc.h + * + * Compiler-specific types and macros for lwIP running on mini-os + * + * Tim Deegan , July 2007 + */ + +#ifndef __LWIP_ARCH_CC_H__ +#define __LWIP_ARCH_CC_H__ + +/* Typedefs for the types used by lwip - */ +#include +#include +#include +typedef uint8_t u8_t; +typedef int8_t s8_t; +typedef uint16_t u16_t; +typedef int16_t s16_t; +typedef uint32_t u32_t; +typedef int32_t s32_t; +typedef uint64_t u64_t; +typedef int64_t s64_t; +typedef uintptr_t mem_ptr_t; + +typedef uint16_t u_short; + +/* Compiler hints for packing lwip's structures - */ +#define PACK_STRUCT_FIELD(_x) _x +#define PACK_STRUCT_STRUCT __attribute__ ((packed)) +#define PACK_STRUCT_BEGIN +#define PACK_STRUCT_END + +/* Platform specific diagnostic output - */ + +extern void lwip_printk(char *fmt, ...); +#define LWIP_PLATFORM_DIAG(_x) do { lwip_printk _x ; } while (0) + +extern void lwip_die(char *fmt, ...); +#define LWIP_PLATFORM_ASSERT(_x) do { lwip_die(_x); } while(0) + +/* "lightweight" synchronization mechanisms - */ +/* SYS_ARCH_DECL_PROTECT(x) - declare a protection state variable. */ +/* SYS_ARCH_PROTECT(x) - enter protection mode. */ +/* SYS_ARCH_UNPROTECT(x) - leave protection mode. */ + +/* If the compiler does not provide memset() this file must include a */ +/* definition of it, or include a file which defines it. */ +#include + +/* This file must either include a system-local which defines */ +/* the standard *nix error codes, or it should #define LWIP_PROVIDE_ERRNO */ +/* to make lwip/arch.h define the codes which are used throughout. */ +#include + +/* Not required by the docs, but needed for network-order calculations */ +#ifdef HAVE_LIBC +#include +#ifndef BIG_ENDIAN +#error endian.h does not define byte order +#endif +#else +#include +#endif + +#include +#define S16_F PRIi16 +#define U16_F PRIu16 +#define X16_F PRIx16 +#define S32_F PRIi32 +#define U32_F PRIu32 +#define X32_F PRIx32 + +#if 0 +#ifndef DBG_ON +#define DBG_ON LWIP_DBG_ON +#endif +#define LWIP_DEBUG DBG_ON +//#define IP_DEBUG DBG_ON +#define TCP_DEBUG DBG_ON +#define TCP_INPUT_DEBUG DBG_ON +#define TCP_QLEN_DEBUG DBG_ON +#define TCPIP_DEBUG DBG_ON +#define DBG_TYPES_ON DBG_ON +#endif + +#endif /* __LWIP_ARCH_CC_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/arch/perf.h xen-4.9.2/extras/mini-os/include/arch/perf.h --- xen-4.9.0/extras/mini-os/include/arch/perf.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arch/perf.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,15 @@ +/* + * lwip/arch/perf.h + * + * Arch-specific performance measurement for lwIP running on mini-os + * + * Tim Deegan , July 2007 + */ + +#ifndef __LWIP_ARCH_PERF_H__ +#define __LWIP_ARCH_PERF_H__ + +#define PERF_START do { } while(0) +#define PERF_STOP(_x) do { (void)(_x); } while (0) + +#endif /* __LWIP_ARCH_PERF_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/arch/sys_arch.h xen-4.9.2/extras/mini-os/include/arch/sys_arch.h --- xen-4.9.0/extras/mini-os/include/arch/sys_arch.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arch/sys_arch.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,35 @@ +/* + * lwip/arch/sys_arch.h + * + * Arch-specific semaphores and mailboxes for lwIP running on mini-os + * + * Tim Deegan , July 2007 + */ + +#ifndef __LWIP_ARCH_SYS_ARCH_H__ +#define __LWIP_ARCH_SYS_ARCH_H__ + +#include +#include +#include + +typedef struct semaphore *sys_sem_t; +#define SYS_SEM_NULL ((sys_sem_t) NULL) + +struct mbox { + int count; + void **messages; + struct semaphore read_sem; + struct semaphore write_sem; + int writer; + int reader; +}; + +typedef struct mbox *sys_mbox_t; +#define SYS_MBOX_NULL ((sys_mbox_t) 0) + +typedef struct thread *sys_thread_t; + +typedef unsigned long sys_prot_t; + +#endif /*__LWIP_ARCH_SYS_ARCH_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/arm/arch_endian.h xen-4.9.2/extras/mini-os/include/arm/arch_endian.h --- xen-4.9.0/extras/mini-os/include/arm/arch_endian.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arm/arch_endian.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef ARCH_ENDIAN_H +#error "Do not include arch_endian by itself, include endian.h" +#else + +#define __BYTE_ORDER __LITTLE_ENDIAN + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/arm/arch_limits.h xen-4.9.2/extras/mini-os/include/arm/arch_limits.h --- xen-4.9.0/extras/mini-os/include/arm/arch_limits.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arm/arch_limits.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,9 @@ +#ifndef __ARCH_LIMITS_H__ +#define __ARCH_LIMITS_H__ + +#include + +#define __STACK_SIZE_PAGE_ORDER 2 +#define __STACK_SIZE (4 * PAGE_SIZE) + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/arm/arch_mm.h xen-4.9.2/extras/mini-os/include/arm/arch_mm.h --- xen-4.9.0/extras/mini-os/include/arm/arch_mm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arm/arch_mm.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,37 @@ +#ifndef _ARCH_MM_H_ +#define _ARCH_MM_H_ + +typedef uint64_t paddr_t; + +extern char _text, _etext, _erodata, _edata, _end, __bss_start; +extern int _boot_stack[]; +extern int _boot_stack_end[]; +extern uint32_t physical_address_offset; /* Add this to a virtual address to get the physical address (wraps at 4GB) */ + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1 << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define L1_PAGETABLE_SHIFT 12 + +#define L1_PROT 0 + +#define to_phys(x) (((paddr_t)(x)+physical_address_offset) & 0xffffffff) +#define to_virt(x) ((void *)(((x)-physical_address_offset) & 0xffffffff)) + +#define PFN_UP(x) (unsigned long)(((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT) +#define PFN_DOWN(x) (unsigned long)((x) >> L1_PAGETABLE_SHIFT) +#define PFN_PHYS(x) ((uint64_t)(x) << L1_PAGETABLE_SHIFT) +#define PHYS_PFN(x) (unsigned long)((x) >> L1_PAGETABLE_SHIFT) + +#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define virt_to_mfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define mfn_to_virt(_mfn) (to_virt(PFN_PHYS(_mfn))) +#define pfn_to_virt(_pfn) (to_virt(PFN_PHYS(_pfn))) + +#define virtual_to_mfn(_virt) virt_to_mfn(_virt) + +// FIXME +#define map_frames(f, n) (NULL) + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/arm/arch_sched.h xen-4.9.2/extras/mini-os/include/arm/arch_sched.h --- xen-4.9.0/extras/mini-os/include/arm/arch_sched.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arm/arch_sched.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,19 @@ +#ifndef __ARCH_SCHED_H__ +#define __ARCH_SCHED_H__ + +#include "arch_limits.h" + +static inline struct thread* get_current(void) +{ + struct thread **current; + unsigned long sp; + __asm__ __volatile__ ("mov %0, sp":"=r"(sp)); + current = (void *)(unsigned long)(sp & ~(__STACK_SIZE-1)); + return *current; +} + +void __arch_switch_threads(unsigned long *prevctx, unsigned long *nextctx); + +#define arch_switch_threads(prev,next) __arch_switch_threads(&(prev)->sp, &(next)->sp) + +#endif /* __ARCH_SCHED_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/arm/arch_spinlock.h xen-4.9.2/extras/mini-os/include/arm/arch_spinlock.h --- xen-4.9.0/extras/mini-os/include/arm/arch_spinlock.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arm/arch_spinlock.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,36 @@ +#ifndef __ARCH_ASM_SPINLOCK_H +#define __ARCH_ASM_SPINLOCK_H + +#include "os.h" + +#define ARCH_SPIN_LOCK_UNLOCKED { 1 } + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) <= 0) +#define arch_spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) + +static inline void _raw_spin_unlock(spinlock_t *lock) +{ + xchg(&lock->slock, 1); +} + +static inline int _raw_spin_trylock(spinlock_t *lock) +{ + return xchg(&lock->slock, 0) != 0 ? 1 : 0; +} + +static inline void _raw_spin_lock(spinlock_t *lock) +{ + volatile int was_locked; + do { + was_locked = xchg(&lock->slock, 0) == 0 ? 1 : 0; + } while(was_locked); +} + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/arm/arm32/arch_wordsize.h xen-4.9.2/extras/mini-os/include/arm/arm32/arch_wordsize.h --- xen-4.9.0/extras/mini-os/include/arm/arm32/arch_wordsize.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arm/arm32/arch_wordsize.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1 @@ +#define __WORDSIZE 32 diff -Nru xen-4.9.0/extras/mini-os/include/arm/asm_macros.h xen-4.9.2/extras/mini-os/include/arm/asm_macros.h --- xen-4.9.0/extras/mini-os/include/arm/asm_macros.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arm/asm_macros.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,14 @@ +#ifndef _ARM_ASM_MACRO_H_ +#define _ARM_ASM_MACRO_H_ + +#endif /* _ARM_ASM_MACRO_H_ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/arm/gic.h xen-4.9.2/extras/mini-os/include/arm/gic.h --- xen-4.9.0/extras/mini-os/include/arm/gic.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arm/gic.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1 @@ +void gic_init(void); diff -Nru xen-4.9.0/extras/mini-os/include/arm/hypercall-arm.h xen-4.9.2/extras/mini-os/include/arm/hypercall-arm.h --- xen-4.9.0/extras/mini-os/include/arm/hypercall-arm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arm/hypercall-arm.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,98 @@ +/****************************************************************************** + * hypercall-arm.h + * + * Copied from XenLinux. + * + * Copyright (c) 2002-2004, K A Fraser + * + * 64-bit updates: + * Benjamin Liu + * Jun Nakajima + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_ARM_H__ +#define __HYPERCALL_ARM_H__ + +#include +#include +#include +#include + +int +HYPERVISOR_sched_op( + int cmd, void *arg); + +static inline int +HYPERVISOR_shutdown( + unsigned int reason) +{ + struct sched_shutdown shutdown = { .reason = reason }; + HYPERVISOR_sched_op(SCHEDOP_shutdown, &shutdown); +} + +int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg); + +int +HYPERVISOR_event_channel_op( + int cmd, void *op); + +int +HYPERVISOR_xen_version( + int cmd, void *arg); + +int +HYPERVISOR_console_io( + int cmd, int count, char *str); + +int +HYPERVISOR_physdev_op( + void *physdev_op); + +int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count); + +int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args); + +int +HYPERVISOR_sysctl( + unsigned long op); + +int +HYPERVISOR_domctl( + unsigned long op); + +int +HYPERVISOR_hvm_op( + unsigned long op, void *arg); + +int +HYPERVISOR_xsm_op( + struct xen_flask_op *); + +#endif /* __HYPERCALL_ARM_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/arm/os.h xen-4.9.2/extras/mini-os/include/arm/os.h --- xen-4.9.0/extras/mini-os/include/arm/os.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arm/os.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,216 @@ +#ifndef _OS_H_ +#define _OS_H_ + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include + +void arch_fini(void); +void timer_handler(evtchn_port_t port, struct pt_regs *regs, void *ign); + +extern void *device_tree; + +#define BUG() while(1){asm volatile (".word 0xe7f000f0\n");} /* Undefined instruction; will call our fault handler. */ + +#define smp_processor_id() 0 + +#define barrier() __asm__ __volatile__("": : :"memory") + +extern shared_info_t *HYPERVISOR_shared_info; + +// disable interrupts +static inline void local_irq_disable(void) { + __asm__ __volatile__("cpsid i":::"memory"); +} + +// enable interrupts +static inline void local_irq_enable(void) { + __asm__ __volatile__("cpsie i":::"memory"); +} + +#define local_irq_save(x) { \ + __asm__ __volatile__("mrs %0, cpsr;cpsid i":"=r"(x)::"memory"); \ +} + +#define local_irq_restore(x) { \ + __asm__ __volatile__("msr cpsr_c, %0"::"r"(x):"memory"); \ +} + +#define local_save_flags(x) { \ + __asm__ __volatile__("mrs %0, cpsr":"=r"(x)::"memory"); \ +} + +static inline int irqs_disabled(void) { + int x; + local_save_flags(x); + return x & 0x80; +} + +/* We probably only need "dmb" here, but we'll start by being paranoid. */ +#define mb() __asm__("dsb":::"memory"); +#define rmb() __asm__("dsb":::"memory"); +#define wmb() __asm__("dsb":::"memory"); + +/************************** arm *******************************/ +#ifdef __INSIDE_MINIOS__ +#if defined (__arm__) +#define xchg(ptr,v) __atomic_exchange_n(ptr, v, __ATOMIC_SEQ_CST) + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + * + * This operation is atomic. + * If you need a memory barrier, use synch_test_and_clear_bit instead. + */ +static __inline__ int test_and_clear_bit(int nr, volatile void * addr) +{ + uint8_t *byte = ((uint8_t *)addr) + (nr >> 3); + uint8_t bit = 1 << (nr & 7); + uint8_t orig; + + orig = __atomic_fetch_and(byte, ~bit, __ATOMIC_RELAXED); + + return (orig & bit) != 0; +} + +/** + * Atomically set a bit and return the old value. + * Similar to test_and_clear_bit. + */ +static __inline__ int test_and_set_bit(int nr, volatile void *base) +{ + uint8_t *byte = ((uint8_t *)base) + (nr >> 3); + uint8_t bit = 1 << (nr & 7); + uint8_t orig; + + orig = __atomic_fetch_or(byte, bit, __ATOMIC_RELAXED); + + return (orig & bit) != 0; +} + +/** + * Test whether a bit is set. */ +static __inline__ int test_bit(int nr, const volatile unsigned long *addr) +{ + const uint8_t *ptr = (const uint8_t *) addr; + return ((1 << (nr & 7)) & (ptr[nr >> 3])) != 0; +} + +/** + * Atomically set a bit in memory (like test_and_set_bit but discards result). + */ +static __inline__ void set_bit(int nr, volatile unsigned long *addr) +{ + test_and_set_bit(nr, addr); +} + +/** + * Atomically clear a bit in memory (like test_and_clear_bit but discards result). + */ +static __inline__ void clear_bit(int nr, volatile unsigned long *addr) +{ + test_and_clear_bit(nr, addr); +} + +/** + * __ffs - find first (lowest) set bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __inline__ unsigned long __ffs(unsigned long word) +{ + int clz; + + /* xxxxx10000 = word + * xxxxx01111 = word - 1 + * 0000011111 = word ^ (word - 1) + * 4 = 31 - clz(word ^ (word - 1)) + */ + + __asm__ ( + "sub r0, %[word], #1\n" + "eor r0, r0, %[word]\n" + "clz %[clz], r0\n": + /* Outputs: */ + [clz] "=r"(clz): + /* Inputs: */ + [word] "r"(word): + /* Clobbers: */ + "r0"); + + return 31 - clz; +} + +#else /* ifdef __arm__ */ +#error "Unsupported architecture" +#endif +#endif /* ifdef __INSIDE_MINIOS */ + +/********************* common arm32 and arm64 ****************************/ + +/* If *ptr == old, then store new there (and return new). + * Otherwise, return the old value. + * Atomic. */ +#define synch_cmpxchg(ptr, old, new) \ +({ __typeof__(*ptr) stored = old; \ + __atomic_compare_exchange_n(ptr, &stored, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ? new : old; \ +}) + +/* As test_and_clear_bit, but using __ATOMIC_SEQ_CST */ +static __inline__ int synch_test_and_clear_bit(int nr, volatile void *addr) +{ + uint8_t *byte = ((uint8_t *)addr) + (nr >> 3); + uint8_t bit = 1 << (nr & 7); + uint8_t orig; + + orig = __atomic_fetch_and(byte, ~bit, __ATOMIC_SEQ_CST); + + return (orig & bit) != 0; +} + +/* As test_and_set_bit, but using __ATOMIC_SEQ_CST */ +static __inline__ int synch_test_and_set_bit(int nr, volatile void *base) +{ + uint8_t *byte = ((uint8_t *)base) + (nr >> 3); + uint8_t bit = 1 << (nr & 7); + uint8_t orig; + + orig = __atomic_fetch_or(byte, bit, __ATOMIC_SEQ_CST); + + return (orig & bit) != 0; +} + +/* As set_bit, but using __ATOMIC_SEQ_CST */ +static __inline__ void synch_set_bit(int nr, volatile void *addr) +{ + synch_test_and_set_bit(nr, addr); +} + +/* As clear_bit, but using __ATOMIC_SEQ_CST */ +static __inline__ void synch_clear_bit(int nr, volatile void *addr) +{ + synch_test_and_clear_bit(nr, addr); +} + +/* As test_bit, but with a following memory barrier. */ +static __inline__ int synch_test_bit(int nr, volatile void *addr) +{ + int result; + result = test_bit(nr, addr); + barrier(); + return result; +} + +#endif /* not assembly */ + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/arm/traps.h xen-4.9.2/extras/mini-os/include/arm/traps.h --- xen-4.9.0/extras/mini-os/include/arm/traps.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/arm/traps.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,20 @@ +#ifndef _TRAPS_H_ +#define _TRAPS_H_ + +struct pt_regs { + unsigned long r0; + unsigned long r1; + unsigned long r2; + unsigned long r3; + unsigned long r4; + unsigned long r5; + unsigned long r6; + unsigned long r7; + unsigned long r8; + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; +}; + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/asm_macros.h xen-4.9.2/extras/mini-os/include/asm_macros.h --- xen-4.9.0/extras/mini-os/include/asm_macros.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/asm_macros.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,40 @@ +/* + * Macros for assembly files. + */ + +#ifndef _ASM_MACRO_H_ +#define _ASM_MACRO_H_ + +#if defined(__i386__) || defined(__x86_64__) +#include +#elif defined(__arm__) || defined(__aarch64__) +#include +#endif + +#ifdef __ASSEMBLY__ + +#define ELFNOTE(name, type, desc) \ + .pushsection .note.name ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type /* type */ ; \ +1:.asciz #name /* name */ ; \ +2:.align 4 ; \ +3:desc /* desc */ ; \ +4:.align 4 ; \ + .popsection + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_MACRO_H_ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/balloon.h xen-4.9.2/extras/mini-os/include/balloon.h --- xen-4.9.0/extras/mini-os/include/balloon.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/balloon.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,55 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2016 - Juergen Gross, SUSE Linux GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _BALLOON_H_ +#define _BALLOON_H_ + +#ifdef CONFIG_BALLOON + +/* + * Always keep some pages free for allocations while ballooning or + * interrupts disabled. + */ +#define BALLOON_EMERGENCY_PAGES 64 + +extern unsigned long nr_max_pages; +extern unsigned long nr_mem_pages; + +void get_max_pages(void); +int balloon_up(unsigned long n_pages); + +void mm_alloc_bitmap_remap(void); +void arch_pfn_add(unsigned long pfn, unsigned long mfn); +int chk_free_pages(unsigned long needed); + +#else /* CONFIG_BALLOON */ + +static inline void get_max_pages(void) { } +static inline void mm_alloc_bitmap_remap(void) { } +static inline int chk_free_pages(unsigned long needed) +{ + return needed <= nr_free_pages; +} + +#endif /* CONFIG_BALLOON */ +#endif /* _BALLOON_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/blkfront.h xen-4.9.2/extras/mini-os/include/blkfront.h --- xen-4.9.0/extras/mini-os/include/blkfront.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/blkfront.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,54 @@ +#include +#include +#include +struct blkfront_dev; +struct blkfront_aiocb +{ + struct blkfront_dev *aio_dev; + uint8_t *aio_buf; + size_t aio_nbytes; + off_t aio_offset; + size_t total_bytes; + uint8_t is_write; + void *data; + + grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int n; + + void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret); +}; +struct blkfront_info +{ + uint64_t sectors; + unsigned sector_size; + int mode; + int info; + int barrier; + int flush; +}; +struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info); +#ifdef HAVE_LIBC +#include +/* POSIX IO functions: + * use blkfront_open() to get a file descriptor to the block device + * Don't use the other blkfront posix functions here directly, instead use + * read(), write(), lseek() and fstat() on the file descriptor + */ +int blkfront_open(struct blkfront_dev *dev); +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write); +#define blkfront_posix_write(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 1) +#define blkfront_posix_read(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 0) +int blkfront_posix_fstat(int fd, struct stat* buf); +#endif +void blkfront_aio(struct blkfront_aiocb *aiocbp, int write); +#define blkfront_aio_read(aiocbp) blkfront_aio(aiocbp, 0) +#define blkfront_aio_write(aiocbp) blkfront_aio(aiocbp, 1) +void blkfront_io(struct blkfront_aiocb *aiocbp, int write); +#define blkfront_read(aiocbp) blkfront_io(aiocbp, 0) +#define blkfront_write(aiocbp) blkfront_io(aiocbp, 1) +void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op); +int blkfront_aio_poll(struct blkfront_dev *dev); +void blkfront_sync(struct blkfront_dev *dev); +void shutdown_blkfront(struct blkfront_dev *dev); + +extern struct wait_queue_head blkfront_queue; diff -Nru xen-4.9.0/extras/mini-os/include/byteorder.h xen-4.9.2/extras/mini-os/include/byteorder.h --- xen-4.9.0/extras/mini-os/include/byteorder.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/byteorder.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,36 @@ +#ifndef MINIOS_BYTEORDER_H +#define MINIOS_BYTEORDER_H + +#include +#include + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define be16_to_cpu(v) bswap_16(v) +#define be32_to_cpu(v) bswap_32(v) +#define be64_to_cpu(v) bswap_64(v) + +#define le16_to_cpu(v) (v) +#define le32_to_cpu(v) (v) +#define le64_to_cpu(v) (v) + +#else /*__BIG_ENDIAN*/ +#define be16_to_cpu(v) (v) +#define be32_to_cpu(v) (v) +#define be64_to_cpu(v) (v) + +#define le16_to_cpu(v) bswap_16(v) +#define le32_to_cpu(v) bswap_32(v) +#define le64_to_cpu(v) bswap_64(v) + +#endif + +#define cpu_to_be16(v) be16_to_cpu(v) +#define cpu_to_be32(v) be32_to_cpu(v) +#define cpu_to_be64(v) be64_to_cpu(v) + +#define cpu_to_le16(v) le16_to_cpu(v) +#define cpu_to_le32(v) le32_to_cpu(v) +#define cpu_to_le64(v) le64_to_cpu(v) + + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/byteswap.h xen-4.9.2/extras/mini-os/include/byteswap.h --- xen-4.9.0/extras/mini-os/include/byteswap.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/byteswap.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,39 @@ +#ifndef _BYTESWAP_H_ +#define _BYTESWAP_H_ + +/* Unfortunately not provided by newlib. */ + +#include + +#define bswap_16(x) ((uint16_t)( \ + (((uint16_t)(x) & (uint16_t)0x00ffU) << 8) | \ + (((uint16_t)(x) & (uint16_t)0xff00U) >> 8))) + +/* Use gcc optimized versions if they exist */ +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +#define bswap_32(v) __builtin_bswap32(v) +#define bswap_64(v) __builtin_bswap64(v) +#else + +#define bswap_32(x) ((uint32_t)( \ + (((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) | \ + (((uint32_t)(x) & (uint32_t)0x0000ff00UL) << 8) | \ + (((uint32_t)(x) & (uint32_t)0x00ff0000UL) >> 8) | \ + (((uint32_t)(x) & (uint32_t)0xff000000UL) >> 24))) + +#define bswap_64(x) ((uint64_t)( \ + (((uint64_t)(x) & (uint64_t)0x00000000000000ffULL) << 56) | \ + (((uint64_t)(x) & (uint64_t)0x000000000000ff00ULL) << 40) | \ + (((uint64_t)(x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \ + (((uint64_t)(x) & (uint64_t)0x00000000ff000000ULL) << 8) | \ + (((uint64_t)(x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \ + (((uint64_t)(x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \ + (((uint64_t)(x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \ + (((uint64_t)(x) & (uint64_t)0xff00000000000000ULL) >> 56))) + +#endif + + + + +#endif /* _BYTESWAP_H */ diff -Nru xen-4.9.0/extras/mini-os/include/compiler.h xen-4.9.2/extras/mini-os/include/compiler.h --- xen-4.9.0/extras/mini-os/include/compiler.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/compiler.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,11 @@ +#ifndef __MINIOS_COMPILER_H_ +#define __MINIOS_COMPILER_H_ + +#if __GNUC__ == 2 && __GNUC_MINOR__ < 96 +#define __builtin_expect(x, expected_value) (x) +#endif +#define unlikely(x) __builtin_expect(!!(x),0) +#define likely(x) __builtin_expect(!!(x),1) +#define __packed __attribute__((__packed__)) + +#endif /* __MINIOS_COMPILER_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/console.h xen-4.9.2/extras/mini-os/include/console.h --- xen-4.9.0/extras/mini-os/include/console.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/console.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,90 @@ +/* + **************************************************************************** + * (C) 2006 - Grzegorz Milos - Cambridge University + **************************************************************************** + * + * File: console.h + * Author: Grzegorz Milos + * Changes: + * + * Date: Mar 2006 + * + * Environment: Xen Minimal OS + * Description: Console interface. + * + * Handles console I/O. Defines printk. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef _LIB_CONSOLE_H_ +#define _LIB_CONSOLE_H_ + +#include +#include +#include +#include +#include +#include +#include + +struct consfront_dev { + domid_t dom; + + struct xencons_interface *ring; + grant_ref_t ring_ref; + evtchn_port_t evtchn; + + char *nodename; + char *backend; + + xenbus_event_queue events; + +#ifdef HAVE_LIBC + int fd; +#endif +}; + +extern uint32_t console_evtchn; + +void print(int direct, const char *fmt, va_list args); +void printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +void xprintk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); + +#define tprintk(_fmt, _args...) printk("[%s] " _fmt, current->name, ##_args) + +void xencons_rx(char *buf, unsigned len, struct pt_regs *regs); +void xencons_tx(void); + +void get_console(void *p); +void init_console(void); +void console_print(struct consfront_dev *dev, char *data, int length); +void fini_console(struct consfront_dev *dev); + +/* Low level functions defined in xencons_ring.c */ +extern struct wait_queue_head console_queue; +struct consfront_dev *xencons_ring_init(void); +struct consfront_dev *init_consfront(char *_nodename); +int xencons_ring_send(struct consfront_dev *dev, const char *data, unsigned len); +int xencons_ring_send_no_notify(struct consfront_dev *dev, const char *data, unsigned len); +int xencons_ring_avail(struct consfront_dev *dev); +int xencons_ring_recv(struct consfront_dev *dev, char *data, unsigned len); +void free_consfront(struct consfront_dev *dev); + +#endif /* _LIB_CONSOLE_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/ctype.h xen-4.9.2/extras/mini-os/include/ctype.h --- xen-4.9.0/extras/mini-os/include/ctype.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/ctype.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,60 @@ +#ifndef _CTYPE_H +#define _CTYPE_H + +#ifdef HAVE_LIBC +#include_next +#else +/* + * NOTE! This ctype does not handle EOF like the standard C + * library is required to. + */ + +#define _U 0x01 /* upper */ +#define _L 0x02 /* lower */ +#define _D 0x04 /* digit */ +#define _C 0x08 /* cntrl */ +#define _P 0x10 /* punct */ +#define _S 0x20 /* white space (space/lf/tab) */ +#define _X 0x40 /* hex digit */ +#define _SP 0x80 /* hard space (0x20) */ + + +extern unsigned char _ctype[]; + +#define __ismask(x) (_ctype[(int)(unsigned char)(x)]) + +#define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) +#define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) +#define iscntrl(c) ((__ismask(c)&(_C)) != 0) +#define isdigit(c) ((__ismask(c)&(_D)) != 0) +#define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) +#define islower(c) ((__ismask(c)&(_L)) != 0) +#define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) +#define ispunct(c) ((__ismask(c)&(_P)) != 0) +#define isspace(c) ((__ismask(c)&(_S)) != 0) +#define isupper(c) ((__ismask(c)&(_U)) != 0) +#define isxdigit(c) ((__ismask(c)&(_D|_X)) != 0) + +#define isascii(c) (((unsigned char)(c))<=0x7f) +#define toascii(c) (((unsigned char)(c))&0x7f) + +static inline unsigned char __tolower(unsigned char c) +{ + if (isupper(c)) + c -= 'A'-'a'; + return c; +} + +static inline unsigned char __toupper(unsigned char c) +{ + if (islower(c)) + c -= 'a'-'A'; + return c; +} + +#define tolower(c) __tolower(c) +#define toupper(c) __toupper(c) + +#endif + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/e820.h xen-4.9.2/extras/mini-os/include/e820.h --- xen-4.9.0/extras/mini-os/include/e820.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/e820.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,48 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2016 - Juergen Gross, SUSE Linux GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __E820_HEADER +#define __E820_HEADER + +/* PC BIOS standard E820 types and structure. */ +#define E820_RAM 1 +#define E820_RESERVED 2 +#define E820_ACPI 3 +#define E820_NVS 4 +#define E820_UNUSABLE 5 +#define E820_PMEM 7 +#define E820_TYPES 8 + +struct __packed e820entry { + uint64_t addr; + uint64_t size; + uint32_t type; +}; + +/* Maximum number of entries. */ +#define E820_MAX 128 + +extern struct e820entry e820_map[]; +extern unsigned e820_entries; + +#endif /*__E820_HEADER*/ diff -Nru xen-4.9.0/extras/mini-os/include/endian.h xen-4.9.2/extras/mini-os/include/endian.h --- xen-4.9.0/extras/mini-os/include/endian.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/endian.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,19 @@ +#ifndef _ENDIAN_H_ +#define _ENDIAN_H_ + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 +#define __PDP_ENDIAN 3412 + +#define ARCH_ENDIAN_H +/* This will define __BYTE_ORDER for the current arch */ +#include +#undef ARCH_ENDIAN_H + +#include + +#define BYTE_ORDER __BYTE_ORDER +#define BIG_ENDIAN __BIG_ENDIAN +#define LITTLE_ENDIAN __LITTLE_ENDIAN + +#endif /* endian.h */ diff -Nru xen-4.9.0/extras/mini-os/include/err.h xen-4.9.2/extras/mini-os/include/err.h --- xen-4.9.0/extras/mini-os/include/err.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/err.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,31 @@ +#ifndef _ERR_H +#define _ERR_H + +#include + +/* + * Kernel pointers have redundant information, so we can use a + * scheme where we can return either an error code or a dentry + * pointer with the same return value. + * + * This should be a per-architecture thing, to allow different + * error and pointer decisions. + */ +#define IS_ERR_VALUE(x) ((x) > (unsigned long)-1000L) + +static inline void *ERR_PTR(long error) +{ + return (void *) error; +} + +static inline long PTR_ERR(const void *ptr) +{ + return (long) ptr; +} + +static inline long IS_ERR(const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +#endif /* _LINUX_ERR_H */ diff -Nru xen-4.9.0/extras/mini-os/include/errno-base.h xen-4.9.2/extras/mini-os/include/errno-base.h --- xen-4.9.0/extras/mini-os/include/errno-base.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/errno-base.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,39 @@ +#ifndef _ERRNO_BASE_H +#define _ERRNO_BASE_H + +#define EPERM 1 /* Operation not permitted */ +#define ENOENT 2 /* No such file or directory */ +#define ESRCH 3 /* No such process */ +#define EINTR 4 /* Interrupted system call */ +#define EIO 5 /* I/O error */ +#define ENXIO 6 /* No such device or address */ +#define E2BIG 7 /* Argument list too long */ +#define ENOEXEC 8 /* Exec format error */ +#define EBADF 9 /* Bad file number */ +#define ECHILD 10 /* No child processes */ +#define EAGAIN 11 /* Try again */ +#define ENOMEM 12 /* Out of memory */ +#define EACCES 13 /* Permission denied */ +#define EFAULT 14 /* Bad address */ +#define ENOTBLK 15 /* Block device required */ +#define EBUSY 16 /* Device or resource busy */ +#define EEXIST 17 /* File exists */ +#define EXDEV 18 /* Cross-device link */ +#define ENODEV 19 /* No such device */ +#define ENOTDIR 20 /* Not a directory */ +#define EISDIR 21 /* Is a directory */ +#define EINVAL 22 /* Invalid argument */ +#define ENFILE 23 /* File table overflow */ +#define EMFILE 24 /* Too many open files */ +#define ENOTTY 25 /* Not a typewriter */ +#define ETXTBSY 26 /* Text file busy */ +#define EFBIG 27 /* File too large */ +#define ENOSPC 28 /* No space left on device */ +#define ESPIPE 29 /* Illegal seek */ +#define EROFS 30 /* Read-only file system */ +#define EMLINK 31 /* Too many links */ +#define EPIPE 32 /* Broken pipe */ +#define EDOM 33 /* Math argument out of domain of func */ +#define ERANGE 34 /* Math result not representable */ + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/errno.h xen-4.9.2/extras/mini-os/include/errno.h --- xen-4.9.0/extras/mini-os/include/errno.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/errno.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,122 @@ +#ifndef _ERRNO_H +#define _ERRNO_H + +#include + +typedef int error_t; + +#define EDEADLK 35 /* Resource deadlock would occur */ +#define ENAMETOOLONG 36 /* File name too long */ +#define ENOLCK 37 /* No record locks available */ +#define ENOSYS 38 /* Function not implemented */ +#define ENOTEMPTY 39 /* Directory not empty */ +#define ELOOP 40 /* Too many symbolic links encountered */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ENOMSG 42 /* No message of desired type */ +#define EIDRM 43 /* Identifier removed */ +#define ECHRNG 44 /* Channel number out of range */ +#define EL2NSYNC 45 /* Level 2 not synchronized */ +#define EL3HLT 46 /* Level 3 halted */ +#define EL3RST 47 /* Level 3 reset */ +#define ELNRNG 48 /* Link number out of range */ +#define EUNATCH 49 /* Protocol driver not attached */ +#define ENOCSI 50 /* No CSI structure available */ +#define EL2HLT 51 /* Level 2 halted */ +#define EBADE 52 /* Invalid exchange */ +#define EBADR 53 /* Invalid request descriptor */ +#define EXFULL 54 /* Exchange full */ +#define ENOANO 55 /* No anode */ +#define EBADRQC 56 /* Invalid request code */ +#define EBADSLT 57 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EMULTIHOP 72 /* Multihop attempted */ +#define EDOTDOT 73 /* RFS specific error */ +#define EBADMSG 74 /* Not a data message */ +#define EOVERFLOW 75 /* Value too large for defined data type */ +#define ENOTUNIQ 76 /* Name not unique on network */ +#define EBADFD 77 /* File descriptor in bad state */ +#define EREMCHG 78 /* Remote address changed */ +#define ELIBACC 79 /* Can not access a needed shared library */ +#define ELIBBAD 80 /* Accessing a corrupted shared library */ +#define ELIBSCN 81 /* .lib section in a.out corrupted */ +#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 83 /* Cannot exec a shared library directly */ +#define EILSEQ 84 /* Illegal byte sequence */ +#define ERESTART 85 /* Interrupted system call should be restarted */ +#define ESTRPIPE 86 /* Streams pipe error */ +#define EUSERS 87 /* Too many users */ +#define ENOTSOCK 88 /* Socket operation on non-socket */ +#define EDESTADDRREQ 89 /* Destination address required */ +#define EMSGSIZE 90 /* Message too long */ +#define EPROTOTYPE 91 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 92 /* Protocol not available */ +#define EPROTONOSUPPORT 93 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ +#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ +#define ENOTSUP EOPNOTSUPP +#define EPFNOSUPPORT 96 /* Protocol family not supported */ +#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ +#define EADDRINUSE 98 /* Address already in use */ +#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ +#define ENETDOWN 100 /* Network is down */ +#define ENETUNREACH 101 /* Network is unreachable */ +#define ENETRESET 102 /* Network dropped connection because of reset */ +#define ECONNABORTED 103 /* Software caused connection abort */ +#define ECONNRESET 104 /* Connection reset by peer */ +#define ENOBUFS 105 /* No buffer space available */ +#define EISCONN 106 /* Transport endpoint is already connected */ +#define ENOTCONN 107 /* Transport endpoint is not connected */ +#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 109 /* Too many references: cannot splice */ +#define ETIMEDOUT 110 /* Connection timed out */ +#define ECONNREFUSED 111 /* Connection refused */ +#define EHOSTDOWN 112 /* Host is down */ +#define EHOSTUNREACH 113 /* No route to host */ +#define EALREADY 114 /* Operation already in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ +#define ESTALE 116 /* Stale NFS file handle */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EDQUOT 122 /* Quota exceeded */ + +#define ENOMEDIUM 123 /* No medium found */ +#define EMEDIUMTYPE 124 /* Wrong medium type */ +#define ECANCELED 125 /* Operation Canceled */ +#define ENOKEY 126 /* Required key not available */ +#define EKEYEXPIRED 127 /* Key has expired */ +#define EKEYREVOKED 128 /* Key has been revoked */ +#define EKEYREJECTED 129 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 130 /* Owner died */ +#define ENOTRECOVERABLE 131 /* State not recoverable */ + + +#define EFTYPE 132 /* Inappropriate file type or format */ + +#ifdef HAVE_LIBC +#include +extern int errno; +#define ERRNO +#define errno (get_current()->reent._errno) +#endif + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/events.h xen-4.9.2/extras/mini-os/include/events.h --- xen-4.9.0/extras/mini-os/include/events.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/events.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,59 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Reseach Cambridge + **************************************************************************** + * + * File: events.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: Jul 2003, changes Jun 2005 + * + * Environment: Xen Minimal OS + * Description: Deals with events on the event channels + * + **************************************************************************** + */ + +#ifndef _EVENTS_H_ +#define _EVENTS_H_ + +#include +#include + +typedef void (*evtchn_handler_t)(evtchn_port_t, struct pt_regs *, void *); + +/* prototypes */ +void arch_init_events(void); + +/* Called by fini_events to close any ports opened by arch-specific code. */ +void arch_unbind_ports(void); + +void arch_fini_events(void); + +int do_event(evtchn_port_t port, struct pt_regs *regs); +evtchn_port_t bind_virq(uint32_t virq, evtchn_handler_t handler, void *data); +evtchn_port_t bind_pirq(uint32_t pirq, int will_share, evtchn_handler_t handler, void *data); +evtchn_port_t bind_evtchn(evtchn_port_t port, evtchn_handler_t handler, + void *data); +void unbind_evtchn(evtchn_port_t port); +void init_events(void); +int evtchn_alloc_unbound(domid_t pal, evtchn_handler_t handler, + void *data, evtchn_port_t *port); +int evtchn_bind_interdomain(domid_t pal, evtchn_port_t remote_port, + evtchn_handler_t handler, void *data, + evtchn_port_t *local_port); +int evtchn_get_peercontext(evtchn_port_t local_port, char *ctx, int size); +void unbind_all_ports(void); + +static inline int notify_remote_via_evtchn(evtchn_port_t port) +{ + evtchn_send_t op; + op.port = port; + return HYPERVISOR_event_channel_op(EVTCHNOP_send, &op); +} + +void fini_events(void); + +#endif /* _EVENTS_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/fbfront.h xen-4.9.2/extras/mini-os/include/fbfront.h --- xen-4.9.0/extras/mini-os/include/fbfront.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/fbfront.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,46 @@ +#include +#include +#include +#include + +/* from */ +#ifndef BTN_LEFT +#define BTN_LEFT 0x110 +#endif +#ifndef BTN_RIGHT +#define BTN_RIGHT 0x111 +#endif +#ifndef BTN_MIDDLE +#define BTN_MIDDLE 0x112 +#endif +#ifndef KEY_Q +#define KEY_Q 16 +#endif +#ifndef KEY_MAX +#define KEY_MAX 0x1ff +#endif + + +struct kbdfront_dev; +struct kbdfront_dev *init_kbdfront(char *nodename, int abs_pointer); +#ifdef HAVE_LIBC +int kbdfront_open(struct kbdfront_dev *dev); +#endif + +int kbdfront_receive(struct kbdfront_dev *dev, union xenkbd_in_event *buf, int n); +extern struct wait_queue_head kbdfront_queue; + +void shutdown_kbdfront(struct kbdfront_dev *dev); + + +struct fbfront_dev *init_fbfront(char *nodename, unsigned long *mfns, int width, int height, int depth, int stride, int n); +#ifdef HAVE_LIBC +int fbfront_open(struct fbfront_dev *dev); +#endif + +int fbfront_receive(struct fbfront_dev *dev, union xenfb_in_event *buf, int n); +extern struct wait_queue_head fbfront_queue; +void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int height); +void fbfront_resize(struct fbfront_dev *dev, int width, int height, int stride, int depth, int offset); + +void shutdown_fbfront(struct fbfront_dev *dev); diff -Nru xen-4.9.0/extras/mini-os/include/fcntl.h xen-4.9.2/extras/mini-os/include/fcntl.h --- xen-4.9.0/extras/mini-os/include/fcntl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/fcntl.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,99 @@ +#ifndef _I386_FCNTL_H +#define _I386_FCNTL_H + +#ifdef HAVE_LIBC +#include_next +#else + +/* open/fcntl - O_SYNC is only implemented on blocks devices and on files + located on an ext2 file system */ +#define O_ACCMODE 0003 +#define O_RDONLY 00 +#define O_WRONLY 01 +#define O_RDWR 02 +#define O_CREAT 0100 /* not fcntl */ +#define O_EXCL 0200 /* not fcntl */ +#define O_NOCTTY 0400 /* not fcntl */ +#define O_TRUNC 01000 /* not fcntl */ +#define O_APPEND 02000 +#define O_NONBLOCK 04000 +#define O_NDELAY O_NONBLOCK +#define O_SYNC 010000 +#define FASYNC 020000 /* fcntl, for BSD compatibility */ +#define O_DIRECT 040000 /* direct disk access hint */ +#define O_LARGEFILE 0100000 +#define O_DIRECTORY 0200000 /* must be a directory */ +#define O_NOFOLLOW 0400000 /* don't follow links */ +#define O_NOATIME 01000000 + +#define F_DUPFD 0 /* dup */ +#define F_GETFD 1 /* get close_on_exec */ +#define F_SETFD 2 /* set/clear close_on_exec */ +#define F_GETFL 3 /* get file->f_flags */ +#define F_SETFL 4 /* set file->f_flags */ +#define F_GETLK 5 +#define F_SETLK 6 +#define F_SETLKW 7 + +#define F_SETOWN 8 /* for sockets. */ +#define F_GETOWN 9 /* for sockets. */ +#define F_SETSIG 10 /* for sockets. */ +#define F_GETSIG 11 /* for sockets. */ + +#define F_GETLK64 12 /* using 'struct flock64' */ +#define F_SETLK64 13 +#define F_SETLKW64 14 + +/* for F_[GET|SET]FL */ +#define FD_CLOEXEC 1 /* actually anything with low bit set goes */ + +/* for posix fcntl() and lockf() */ +#define F_RDLCK 0 +#define F_WRLCK 1 +#define F_UNLCK 2 + +/* for old implementation of bsd flock () */ +#define F_EXLCK 4 /* or 3 */ +#define F_SHLCK 8 /* or 4 */ + +/* for leases */ +#define F_INPROGRESS 16 + +/* operations for bsd flock(), also used by the kernel implementation */ +#define LOCK_SH 1 /* shared lock */ +#define LOCK_EX 2 /* exclusive lock */ +#define LOCK_NB 4 /* or'd with one of the above to prevent + blocking */ +#define LOCK_UN 8 /* remove lock */ + +#define LOCK_MAND 32 /* This is a mandatory flock */ +#define LOCK_READ 64 /* ... Which allows concurrent read operations */ +#define LOCK_WRITE 128 /* ... Which allows concurrent write operations */ +#define LOCK_RW 192 /* ... Which allows concurrent read & write ops */ + +/* +struct flock { + short l_type; + short l_whence; + off_t l_start; + off_t l_len; + pid_t l_pid; +}; + +struct flock64 { + short l_type; + short l_whence; + loff_t l_start; + loff_t l_len; + pid_t l_pid; +}; + +#define F_LINUX_SPECIFIC_BASE 1024 +*/ + +#endif + +int open(const char *path, int flags, ...) asm("open64"); +int fcntl(int fd, int cmd, ...); + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/gntmap.h xen-4.9.2/extras/mini-os/include/gntmap.h --- xen-4.9.0/extras/mini-os/include/gntmap.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/gntmap.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,35 @@ +#ifndef __GNTMAP_H__ +#define __GNTMAP_H__ + +#include + +/* + * Please consider struct gntmap opaque. If instead you choose to disregard + * this message, I insist that you keep an eye out for raptors. + */ +struct gntmap { + int nentries; + struct gntmap_entry *entries; +}; + +int +gntmap_set_max_grants(struct gntmap *map, int count); + +int +gntmap_munmap(struct gntmap *map, unsigned long start_address, int count); + +void* +gntmap_map_grant_refs(struct gntmap *map, + uint32_t count, + uint32_t *domids, + int domids_stride, + uint32_t *refs, + int writable); + +void +gntmap_init(struct gntmap *map); + +void +gntmap_fini(struct gntmap *map); + +#endif /* !__GNTMAP_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/gnttab.h xen-4.9.2/extras/mini-os/include/gnttab.h --- xen-4.9.0/extras/mini-os/include/gnttab.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/gnttab.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,17 @@ +#ifndef __GNTTAB_H__ +#define __GNTTAB_H__ + +#include + +void init_gnttab(void); +grant_ref_t gnttab_alloc_and_grant(void **map); +grant_ref_t gnttab_grant_access(domid_t domid, unsigned long frame, + int readonly); +grant_ref_t gnttab_grant_transfer(domid_t domid, unsigned long pfn); +unsigned long gnttab_end_transfer(grant_ref_t gref); +int gnttab_end_access(grant_ref_t ref); +const char *gnttabop_error(int16_t status); +void fini_gnttab(void); +grant_entry_v1_t *arch_init_gnttab(int nr_grant_frames); + +#endif /* !__GNTTAB_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/hypervisor.h xen-4.9.2/extras/mini-os/include/hypervisor.h --- xen-4.9.0/extras/mini-os/include/hypervisor.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/hypervisor.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,54 @@ +/****************************************************************************** + * hypervisor.h + * + * Hypervisor handling. + * + * + * Copyright (c) 2002, K A Fraser + * Copyright (c) 2005, Grzegorz Milos + * Updates: Aravindh Puthiyaparambil + */ + +#ifndef _HYPERVISOR_H_ +#define _HYPERVISOR_H_ + +#include +#include +#if defined(__i386__) +#include +#elif defined(__x86_64__) +#include +#elif defined(__arm__) || defined(__aarch64__) +#include +#else +#error "Unsupported architecture" +#endif +#include +#include + +/* hypervisor.c */ +#ifdef CONFIG_PARAVIRT +/* + * a placeholder for the start of day information passed up from the hypervisor + */ +union start_info_union +{ + start_info_t start_info; + char padding[512]; +}; +extern union start_info_union start_info_union; +#define start_info (start_info_union.start_info) +#else +int hvm_get_parameter(int idx, uint64_t *value); +int hvm_set_parameter(int idx, uint64_t value); +#endif +shared_info_t *map_shared_info(void *p); +void force_evtchn_callback(void); +void do_hypervisor_callback(struct pt_regs *regs); +void mask_evtchn(uint32_t port); +void unmask_evtchn(uint32_t port); +void clear_evtchn(uint32_t port); + +extern int in_callback; + +#endif /* __HYPERVISOR_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/ioremap.h xen-4.9.2/extras/mini-os/include/ioremap.h --- xen-4.9.0/extras/mini-os/include/ioremap.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/ioremap.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,33 @@ +/** + * Copyright (C) 2009 Netronome Systems, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _IOREMAP_H_ +#define _IOREMAP_H_ + +void *ioremap(unsigned long phys_addr, unsigned long size); +void *ioremap_nocache(unsigned long phys_addr, unsigned long size); +void iounmap(void *virt_addr, unsigned long size); + +#endif /* _IOREMAP_H_ */ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4 indent-tabs-mode:nil -*- */ diff -Nru xen-4.9.0/extras/mini-os/include/iorw.h xen-4.9.2/extras/mini-os/include/iorw.h --- xen-4.9.0/extras/mini-os/include/iorw.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/iorw.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,16 @@ +#ifndef MINIOS_IORW_H +#define MINIOS_IORW_H + +#include + +void iowrite8(volatile void* addr, uint8_t val); +void iowrite16(volatile void* addr, uint16_t val); +void iowrite32(volatile void* addr, uint32_t val); +void iowrite64(volatile void* addr, uint64_t val); + +uint8_t ioread8(volatile void* addr); +uint16_t ioread16(volatile void* addr); +uint32_t ioread32(volatile void* addr); +uint64_t ioread64(volatile void* addr); + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/kernel.h xen-4.9.2/extras/mini-os/include/kernel.h --- xen-4.9.0/extras/mini-os/include/kernel.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/kernel.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,12 @@ +#ifndef _KERNEL_H_ +#define _KERNEL_H_ + +#define MAX_CMDLINE_SIZE 1024 +extern char cmdline[MAX_CMDLINE_SIZE]; + +void start_kernel(void); +void do_exit(void) __attribute__((noreturn)); +void arch_do_exit(void); +void stop_kernel(void); + +#endif /* _KERNEL_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/lib.h xen-4.9.2/extras/mini-os/include/lib.h --- xen-4.9.0/extras/mini-os/include/lib.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/lib.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,247 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: lib.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Random useful library functions, contains some freebsd stuff + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + * + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _LIB_H_ +#define _LIB_H_ + +#include +#include +#include +#include +#include "gntmap.h" + +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#define BUILD_BUG_ON(cond) ({ _Static_assert(!(cond), "!(" #cond ")"); }) +#define BUILD_BUG_ON_ZERO(cond) \ + sizeof(struct { _Static_assert(!(cond), "!(" #cond ")"); }) +#else +#define BUILD_BUG_ON_ZERO(cond) sizeof(struct { int:-!!(cond); }) +#define BUILD_BUG_ON(cond) ((void)BUILD_BUG_ON_ZERO(cond)) +#endif + +#ifdef HAVE_LIBC +#include +#include +#include +#else +/* string and memory manipulation */ + +/* + * From: + * @(#)libkern.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ +int memcmp(const void *b1, const void *b2, size_t len); + +char *strcat(char * __restrict, const char * __restrict); +int strcmp(const char *, const char *); +char *strcpy(char * __restrict, const char * __restrict); + +char *strdup(const char *__restrict); + +size_t strlen(const char *); + +int strncmp(const char *, const char *, size_t); +char *strncpy(char * __restrict, const char * __restrict, size_t); + +char *strstr(const char *, const char *); + +void *memset(void *, int, size_t); + +char *strchr(const char *p, int ch); +char *strrchr(const char *p, int ch); + +/* From: + * @(#)systm.h 8.7 (Berkeley) 3/29/95 + * $FreeBSD$ + */ +void *memcpy(void *to, const void *from, size_t len); + +size_t strnlen(const char *, size_t); + +unsigned long strtoul(const char *nptr, char **endptr, int base); +int64_t strtoq(const char *nptr, char **endptr, int base); +uint64_t strtouq(const char *nptr, char **endptr, int base); + +extern int sprintf(char * buf, const char * fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int vsprintf(char *buf, const char *, va_list) + __attribute__ ((format (printf, 2, 0))); +extern int snprintf(char * buf, size_t size, const char * fmt, ...) + __attribute__ ((format (printf, 3, 4))); +extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) + __attribute__ ((format (printf, 3, 0))); +extern int scnprintf(char * buf, size_t size, const char * fmt, ...) + __attribute__ ((format (printf, 3, 4))); +extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) + __attribute__ ((format (printf, 3, 0))); +extern int sscanf(const char *, const char *, ...) + __attribute__ ((format (scanf, 2, 3))); +extern int vsscanf(const char *, const char *, va_list) + __attribute__ ((format (scanf, 2, 0))); + +#endif + +#include + +#define RAND_MIX 2654435769U + +int rand(void); + +#include + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define ASSERT(x) \ +do { \ + if (!(x)) { \ + printk("ASSERTION FAILED: %s at %s:%d.\n", \ + # x , \ + __FILE__, \ + __LINE__); \ + BUG(); \ + } \ +} while(0) + +#define BUG_ON(x) ASSERT(!(x)) + +/* Consistency check as much as possible. */ +void sanity_check(void); + +#ifdef HAVE_LIBC +enum fd_type { + FTYPE_NONE = 0, + FTYPE_CONSOLE, + FTYPE_FILE, + FTYPE_XENBUS, + FTYPE_XC, + FTYPE_EVTCHN, + FTYPE_GNTMAP, + FTYPE_SOCKET, + FTYPE_TAP, + FTYPE_BLK, + FTYPE_KBD, + FTYPE_FB, + FTYPE_MEM, + FTYPE_SAVEFILE, + FTYPE_TPMFRONT, + FTYPE_TPM_TIS, +}; + +LIST_HEAD(evtchn_port_list, evtchn_port_info); + +struct evtchn_port_info { + LIST_ENTRY(evtchn_port_info) list; + evtchn_port_t port; + unsigned long pending; + int bound; +}; + +extern struct file { + enum fd_type type; + union { + struct { + /* lwIP fd */ + int fd; + } socket; + struct { + /* FS import fd */ + int fd; + off_t offset; + } file; + struct { + struct evtchn_port_list ports; + } evtchn; + struct gntmap gntmap; + struct { + struct netfront_dev *dev; + } tap; + struct { + struct blkfront_dev *dev; + off_t offset; + } blk; + struct { + struct kbdfront_dev *dev; + } kbd; + struct { + struct fbfront_dev *dev; + } fb; + struct { + struct consfront_dev *dev; + } cons; +#ifdef CONFIG_TPMFRONT + struct { + struct tpmfront_dev *dev; + int respgot; + off_t offset; + } tpmfront; +#endif +#ifdef CONFIG_TPM_TIS + struct { + struct tpm_chip *dev; + int respgot; + off_t offset; + } tpm_tis; +#endif +#ifdef CONFIG_XENBUS + struct { + /* To each xenbus FD is associated a queue of watch events for this + * FD. */ + xenbus_event_queue events; + } xenbus; +#endif + }; + int read; /* maybe available for read */ +} files[]; + +int alloc_fd(enum fd_type type); +void close_all_files(void); +extern struct thread *main_thread; +void sparse(unsigned long data, size_t size); +#endif + +#endif /* _LIB_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/linux/types.h xen-4.9.2/extras/mini-os/include/linux/types.h --- xen-4.9.0/extras/mini-os/include/linux/types.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/linux/types.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,5 @@ +#ifndef _LINUX_TYPES_H_ +#define _LINUX_TYPES_H_ +#include +typedef uint64_t __u64; +#endif /* _LINUX_TYPES_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/lwipopts.h xen-4.9.2/extras/mini-os/include/lwipopts.h --- xen-4.9.0/extras/mini-os/include/lwipopts.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/lwipopts.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,23 @@ +/* + * lwipopts.h + * + * Configuration for lwIP running on mini-os + * + * Tim Deegan , July 2007 + */ + +#ifndef __LWIP_LWIPOPTS_H__ +#define __LWIP_LWIPOPTS_H__ + +#define SYS_LIGHTWEIGHT_PROT 1 +#define MEM_LIBC_MALLOC 1 +#define LWIP_TIMEVAL_PRIVATE 0 +#define LWIP_DHCP 1 +#define LWIP_COMPAT_SOCKETS 0 +#define LWIP_IGMP 1 +#define LWIP_USE_HEAP_FROM_INTERRUPT 1 +#define MEMP_NUM_SYS_TIMEOUT 10 +#define TCP_SND_BUF 3000 +#define TCP_MSS 1500 + +#endif /* __LWIP_LWIPOPTS_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/minios-external/bsd-COPYRIGHT xen-4.9.2/extras/mini-os/include/minios-external/bsd-COPYRIGHT --- xen-4.9.0/extras/mini-os/include/minios-external/bsd-COPYRIGHT 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/minios-external/bsd-COPYRIGHT 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,126 @@ +# $FreeBSD$ +# @(#)COPYRIGHT 8.2 (Berkeley) 3/21/94 + +The compilation of software known as FreeBSD is distributed under the +following terms: + +Copyright (c) 1992-2011 The FreeBSD Project. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +The 4.4BSD and 4.4BSD-Lite software is distributed under the following +terms: + +All of the documentation and software included in the 4.4BSD and 4.4BSD-Lite +Releases is copyrighted by The Regents of the University of California. + +Copyright 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994 + The Regents of the University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: +This product includes software developed by the University of +California, Berkeley and its contributors. +4. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +The Institute of Electrical and Electronics Engineers and the American +National Standards Committee X3, on Information Processing Systems have +given us permission to reprint portions of their documentation. + +In the following statement, the phrase ``this text'' refers to portions +of the system documentation. + +Portions of this text are reprinted and reproduced in electronic form in +the second BSD Networking Software Release, from IEEE Std 1003.1-1988, IEEE +Standard Portable Operating System Interface for Computer Environments +(POSIX), copyright C 1988 by the Institute of Electrical and Electronics +Engineers, Inc. In the event of any discrepancy between these versions +and the original IEEE Standard, the original IEEE Standard is the referee +document. + +In the following statement, the phrase ``This material'' refers to portions +of the system documentation. + +This material is reproduced with permission from American National +Standards Committee X3, on Information Processing Systems. Computer and +Business Equipment Manufacturers Association (CBEMA), 311 First St., NW, +Suite 500, Washington, DC 20001-2178. The developmental work of +Programming Language C was completed by the X3J11 Technical Committee. + +The views and conclusions contained in the software and documentation are +those of the authors and should not be interpreted as representing official +policies, either expressed or implied, of the Regents of the University +of California. + + +NOTE: The copyright of UC Berkeley's Berkeley Software Distribution ("BSD") +source has been updated. The copyright addendum may be found at +ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change and is +included below. + +July 22, 1999 + +To All Licensees, Distributors of Any Version of BSD: + +As you know, certain of the Berkeley Software Distribution ("BSD") source +code files require that further distributions of products containing all or +portions of the software, acknowledge within their advertising materials +that such products contain software developed by UC Berkeley and its +contributors. + +Specifically, the provision reads: + +" * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors." + +Effective immediately, licensees and distributors are no longer required to +include the acknowledgement within advertising materials. Accordingly, the +foregoing paragraph of those BSD Unix files containing it is hereby deleted +in its entirety. + +William Hoskins +Director, Office of Technology Licensing +University of California, Berkeley diff -Nru xen-4.9.0/extras/mini-os/include/minios-external/bsd-queue.3 xen-4.9.2/extras/mini-os/include/minios-external/bsd-queue.3 --- xen-4.9.0/extras/mini-os/include/minios-external/bsd-queue.3 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/minios-external/bsd-queue.3 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,1044 @@ +.\" Copyright (c) 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)queue.3 8.2 (Berkeley) 1/24/94 +.\" $FreeBSD$ +.\" +.Dd May 13, 2011 +.Dt QUEUE 3 +.Os +.Sh NAME +.Nm SLIST_EMPTY , +.Nm SLIST_ENTRY , +.Nm SLIST_FIRST , +.Nm SLIST_FOREACH , +.Nm SLIST_FOREACH_SAFE , +.Nm SLIST_HEAD , +.Nm SLIST_HEAD_INITIALIZER , +.Nm SLIST_INIT , +.Nm SLIST_INSERT_AFTER , +.Nm SLIST_INSERT_HEAD , +.Nm SLIST_NEXT , +.Nm SLIST_REMOVE_AFTER , +.Nm SLIST_REMOVE_HEAD , +.Nm SLIST_REMOVE , +.Nm SLIST_SWAP , +.Nm STAILQ_CONCAT , +.Nm STAILQ_EMPTY , +.Nm STAILQ_ENTRY , +.Nm STAILQ_FIRST , +.Nm STAILQ_FOREACH , +.Nm STAILQ_FOREACH_SAFE , +.Nm STAILQ_HEAD , +.Nm STAILQ_HEAD_INITIALIZER , +.Nm STAILQ_INIT , +.Nm STAILQ_INSERT_AFTER , +.Nm STAILQ_INSERT_HEAD , +.Nm STAILQ_INSERT_TAIL , +.Nm STAILQ_LAST , +.Nm STAILQ_NEXT , +.Nm STAILQ_REMOVE_AFTER , +.Nm STAILQ_REMOVE_HEAD , +.Nm STAILQ_REMOVE , +.Nm STAILQ_SWAP , +.Nm LIST_EMPTY , +.Nm LIST_ENTRY , +.Nm LIST_FIRST , +.Nm LIST_FOREACH , +.Nm LIST_FOREACH_SAFE , +.Nm LIST_HEAD , +.Nm LIST_HEAD_INITIALIZER , +.Nm LIST_INIT , +.Nm LIST_INSERT_AFTER , +.Nm LIST_INSERT_BEFORE , +.Nm LIST_INSERT_HEAD , +.Nm LIST_NEXT , +.Nm LIST_REMOVE , +.Nm LIST_SWAP , +.Nm TAILQ_CONCAT , +.Nm TAILQ_EMPTY , +.Nm TAILQ_ENTRY , +.Nm TAILQ_FIRST , +.Nm TAILQ_FOREACH , +.Nm TAILQ_FOREACH_SAFE , +.Nm TAILQ_FOREACH_REVERSE , +.Nm TAILQ_FOREACH_REVERSE_SAFE , +.Nm TAILQ_HEAD , +.Nm TAILQ_HEAD_INITIALIZER , +.Nm TAILQ_INIT , +.Nm TAILQ_INSERT_AFTER , +.Nm TAILQ_INSERT_BEFORE , +.Nm TAILQ_INSERT_HEAD , +.Nm TAILQ_INSERT_TAIL , +.Nm TAILQ_LAST , +.Nm TAILQ_NEXT , +.Nm TAILQ_PREV , +.Nm TAILQ_REMOVE , +.Nm TAILQ_SWAP +.Nd implementations of singly-linked lists, singly-linked tail queues, +lists and tail queues +.Sh SYNOPSIS +.In sys/queue.h +.\" +.Fn SLIST_EMPTY "SLIST_HEAD *head" +.Fn SLIST_ENTRY "TYPE" +.Fn SLIST_FIRST "SLIST_HEAD *head" +.Fn SLIST_FOREACH "TYPE *var" "SLIST_HEAD *head" "SLIST_ENTRY NAME" +.Fn SLIST_FOREACH_SAFE "TYPE *var" "SLIST_HEAD *head" "SLIST_ENTRY NAME" "TYPE *temp_var" +.Fn SLIST_HEAD "HEADNAME" "TYPE" +.Fn SLIST_HEAD_INITIALIZER "SLIST_HEAD head" +.Fn SLIST_INIT "SLIST_HEAD *head" +.Fn SLIST_INSERT_AFTER "TYPE *listelm" "TYPE *elm" "SLIST_ENTRY NAME" +.Fn SLIST_INSERT_HEAD "SLIST_HEAD *head" "TYPE *elm" "SLIST_ENTRY NAME" +.Fn SLIST_NEXT "TYPE *elm" "SLIST_ENTRY NAME" +.Fn SLIST_REMOVE_AFTER "TYPE *elm" "SLIST_ENTRY NAME" +.Fn SLIST_REMOVE_HEAD "SLIST_HEAD *head" "SLIST_ENTRY NAME" +.Fn SLIST_REMOVE "SLIST_HEAD *head" "TYPE *elm" "TYPE" "SLIST_ENTRY NAME" +.Fn SLIST_SWAP "SLIST_HEAD *head1" "SLIST_HEAD *head2" "SLIST_ENTRY NAME" +.\" +.Fn STAILQ_CONCAT "STAILQ_HEAD *head1" "STAILQ_HEAD *head2" +.Fn STAILQ_EMPTY "STAILQ_HEAD *head" +.Fn STAILQ_ENTRY "TYPE" +.Fn STAILQ_FIRST "STAILQ_HEAD *head" +.Fn STAILQ_FOREACH "TYPE *var" "STAILQ_HEAD *head" "STAILQ_ENTRY NAME" +.Fn STAILQ_FOREACH_SAFE "TYPE *var" "STAILQ_HEAD *head" "STAILQ_ENTRY NAME" "TYPE *temp_var" +.Fn STAILQ_HEAD "HEADNAME" "TYPE" +.Fn STAILQ_HEAD_INITIALIZER "STAILQ_HEAD head" +.Fn STAILQ_INIT "STAILQ_HEAD *head" +.Fn STAILQ_INSERT_AFTER "STAILQ_HEAD *head" "TYPE *listelm" "TYPE *elm" "STAILQ_ENTRY NAME" +.Fn STAILQ_INSERT_HEAD "STAILQ_HEAD *head" "TYPE *elm" "STAILQ_ENTRY NAME" +.Fn STAILQ_INSERT_TAIL "STAILQ_HEAD *head" "TYPE *elm" "STAILQ_ENTRY NAME" +.Fn STAILQ_LAST "STAILQ_HEAD *head" "TYPE" "STAILQ_ENTRY NAME" +.Fn STAILQ_NEXT "TYPE *elm" "STAILQ_ENTRY NAME" +.Fn STAILQ_REMOVE_AFTER "STAILQ_HEAD *head" "TYPE *elm" "STAILQ_ENTRY NAME" +.Fn STAILQ_REMOVE_HEAD "STAILQ_HEAD *head" "STAILQ_ENTRY NAME" +.Fn STAILQ_REMOVE "STAILQ_HEAD *head" "TYPE *elm" "TYPE" "STAILQ_ENTRY NAME" +.Fn STAILQ_SWAP "STAILQ_HEAD *head1" "STAILQ_HEAD *head2" "STAILQ_ENTRY NAME" +.\" +.Fn LIST_EMPTY "LIST_HEAD *head" +.Fn LIST_ENTRY "TYPE" +.Fn LIST_FIRST "LIST_HEAD *head" +.Fn LIST_FOREACH "TYPE *var" "LIST_HEAD *head" "LIST_ENTRY NAME" +.Fn LIST_FOREACH_SAFE "TYPE *var" "LIST_HEAD *head" "LIST_ENTRY NAME" "TYPE *temp_var" +.Fn LIST_HEAD "HEADNAME" "TYPE" +.Fn LIST_HEAD_INITIALIZER "LIST_HEAD head" +.Fn LIST_INIT "LIST_HEAD *head" +.Fn LIST_INSERT_AFTER "TYPE *listelm" "TYPE *elm" "LIST_ENTRY NAME" +.Fn LIST_INSERT_BEFORE "TYPE *listelm" "TYPE *elm" "LIST_ENTRY NAME" +.Fn LIST_INSERT_HEAD "LIST_HEAD *head" "TYPE *elm" "LIST_ENTRY NAME" +.Fn LIST_NEXT "TYPE *elm" "LIST_ENTRY NAME" +.Fn LIST_REMOVE "TYPE *elm" "LIST_ENTRY NAME" +.Fn LIST_SWAP "LIST_HEAD *head1" "LIST_HEAD *head2" "TYPE" "LIST_ENTRY NAME" +.\" +.Fn TAILQ_CONCAT "TAILQ_HEAD *head1" "TAILQ_HEAD *head2" "TAILQ_ENTRY NAME" +.Fn TAILQ_EMPTY "TAILQ_HEAD *head" +.Fn TAILQ_ENTRY "TYPE" +.Fn TAILQ_FIRST "TAILQ_HEAD *head" +.Fn TAILQ_FOREACH "TYPE *var" "TAILQ_HEAD *head" "TAILQ_ENTRY NAME" +.Fn TAILQ_FOREACH_SAFE "TYPE *var" "TAILQ_HEAD *head" "TAILQ_ENTRY NAME" "TYPE *temp_var" +.Fn TAILQ_FOREACH_REVERSE "TYPE *var" "TAILQ_HEAD *head" "HEADNAME" "TAILQ_ENTRY NAME" +.Fn TAILQ_FOREACH_REVERSE_SAFE "TYPE *var" "TAILQ_HEAD *head" "HEADNAME" "TAILQ_ENTRY NAME" "TYPE *temp_var" +.Fn TAILQ_HEAD "HEADNAME" "TYPE" +.Fn TAILQ_HEAD_INITIALIZER "TAILQ_HEAD head" +.Fn TAILQ_INIT "TAILQ_HEAD *head" +.Fn TAILQ_INSERT_AFTER "TAILQ_HEAD *head" "TYPE *listelm" "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_INSERT_BEFORE "TYPE *listelm" "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_INSERT_HEAD "TAILQ_HEAD *head" "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_INSERT_TAIL "TAILQ_HEAD *head" "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_LAST "TAILQ_HEAD *head" "HEADNAME" +.Fn TAILQ_NEXT "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_PREV "TYPE *elm" "HEADNAME" "TAILQ_ENTRY NAME" +.Fn TAILQ_REMOVE "TAILQ_HEAD *head" "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_SWAP "TAILQ_HEAD *head1" "TAILQ_HEAD *head2" "TYPE" "TAILQ_ENTRY NAME" +.\" +.Sh DESCRIPTION +These macros define and operate on four types of data structures: +singly-linked lists, singly-linked tail queues, lists, and tail queues. +All four structures support the following functionality: +.Bl -enum -compact -offset indent +.It +Insertion of a new entry at the head of the list. +.It +Insertion of a new entry after any element in the list. +.It +O(1) removal of an entry from the head of the list. +.It +Forward traversal through the list. +.It +Swawpping the contents of two lists. +.El +.Pp +Singly-linked lists are the simplest of the four data structures +and support only the above functionality. +Singly-linked lists are ideal for applications with large datasets +and few or no removals, +or for implementing a LIFO queue. +Singly-linked lists add the following functionality: +.Bl -enum -compact -offset indent +.It +O(n) removal of any entry in the list. +.El +.Pp +Singly-linked tail queues add the following functionality: +.Bl -enum -compact -offset indent +.It +Entries can be added at the end of a list. +.It +O(n) removal of any entry in the list. +.It +They may be concatenated. +.El +However: +.Bl -enum -compact -offset indent +.It +All list insertions must specify the head of the list. +.It +Each head entry requires two pointers rather than one. +.It +Code size is about 15% greater and operations run about 20% slower +than singly-linked lists. +.El +.Pp +Singly-linked tailqs are ideal for applications with large datasets and +few or no removals, +or for implementing a FIFO queue. +.Pp +All doubly linked types of data structures (lists and tail queues) +additionally allow: +.Bl -enum -compact -offset indent +.It +Insertion of a new entry before any element in the list. +.It +O(1) removal of any entry in the list. +.El +However: +.Bl -enum -compact -offset indent +.It +Each element requires two pointers rather than one. +.It +Code size and execution time of operations (except for removal) is about +twice that of the singly-linked data-structures. +.El +.Pp +Linked lists are the simplest of the doubly linked data structures and support +only the above functionality over singly-linked lists. +.Pp +Tail queues add the following functionality: +.Bl -enum -compact -offset indent +.It +Entries can be added at the end of a list. +.It +They may be traversed backwards, from tail to head. +.It +They may be concatenated. +.El +However: +.Bl -enum -compact -offset indent +.It +All list insertions and removals must specify the head of the list. +.It +Each head entry requires two pointers rather than one. +.It +Code size is about 15% greater and operations run about 20% slower +than singly-linked lists. +.El +.Pp +In the macro definitions, +.Fa TYPE +is the name of a user defined structure, +that must contain a field of type +.Li SLIST_ENTRY , +.Li STAILQ_ENTRY , +.Li LIST_ENTRY , +or +.Li TAILQ_ENTRY , +named +.Fa NAME . +The argument +.Fa HEADNAME +is the name of a user defined structure that must be declared +using the macros +.Li SLIST_HEAD , +.Li STAILQ_HEAD , +.Li LIST_HEAD , +or +.Li TAILQ_HEAD . +See the examples below for further explanation of how these +macros are used. +.Sh SINGLY-LINKED LISTS +A singly-linked list is headed by a structure defined by the +.Nm SLIST_HEAD +macro. +This structure contains a single pointer to the first element +on the list. +The elements are singly linked for minimum space and pointer manipulation +overhead at the expense of O(n) removal for arbitrary elements. +New elements can be added to the list after an existing element or +at the head of the list. +An +.Fa SLIST_HEAD +structure is declared as follows: +.Bd -literal -offset indent +SLIST_HEAD(HEADNAME, TYPE) head; +.Ed +.Pp +where +.Fa HEADNAME +is the name of the structure to be defined, and +.Fa TYPE +is the type of the elements to be linked into the list. +A pointer to the head of the list can later be declared as: +.Bd -literal -offset indent +struct HEADNAME *headp; +.Ed +.Pp +(The names +.Li head +and +.Li headp +are user selectable.) +.Pp +The macro +.Nm SLIST_HEAD_INITIALIZER +evaluates to an initializer for the list +.Fa head . +.Pp +The macro +.Nm SLIST_EMPTY +evaluates to true if there are no elements in the list. +.Pp +The macro +.Nm SLIST_ENTRY +declares a structure that connects the elements in +the list. +.Pp +The macro +.Nm SLIST_FIRST +returns the first element in the list or NULL if the list is empty. +.Pp +The macro +.Nm SLIST_FOREACH +traverses the list referenced by +.Fa head +in the forward direction, assigning each element in +turn to +.Fa var . +.Pp +The macro +.Nm SLIST_FOREACH_SAFE +traverses the list referenced by +.Fa head +in the forward direction, assigning each element in +turn to +.Fa var . +However, unlike +.Fn SLIST_FOREACH +here it is permitted to both remove +.Fa var +as well as free it from within the loop safely without interfering with the +traversal. +.Pp +The macro +.Nm SLIST_INIT +initializes the list referenced by +.Fa head . +.Pp +The macro +.Nm SLIST_INSERT_HEAD +inserts the new element +.Fa elm +at the head of the list. +.Pp +The macro +.Nm SLIST_INSERT_AFTER +inserts the new element +.Fa elm +after the element +.Fa listelm . +.Pp +The macro +.Nm SLIST_NEXT +returns the next element in the list. +.Pp +The macro +.Nm SLIST_REMOVE_AFTER +removes the element after +.Fa elm +from the list. Unlike +.Fa SLIST_REMOVE , +this macro does not traverse the entire list. +.Pp +The macro +.Nm SLIST_REMOVE_HEAD +removes the element +.Fa elm +from the head of the list. +For optimum efficiency, +elements being removed from the head of the list should explicitly use +this macro instead of the generic +.Fa SLIST_REMOVE +macro. +.Pp +The macro +.Nm SLIST_REMOVE +removes the element +.Fa elm +from the list. +.Pp +The macro +.Nm SLIST_SWAP +swaps the contents of +.Fa head1 +and +.Fa head2 . +.Sh SINGLY-LINKED LIST EXAMPLE +.Bd -literal +SLIST_HEAD(slisthead, entry) head = + SLIST_HEAD_INITIALIZER(head); +struct slisthead *headp; /* Singly-linked List head. */ +struct entry { + ... + SLIST_ENTRY(entry) entries; /* Singly-linked List. */ + ... +} *n1, *n2, *n3, *np; + +SLIST_INIT(&head); /* Initialize the list. */ + +n1 = malloc(sizeof(struct entry)); /* Insert at the head. */ +SLIST_INSERT_HEAD(&head, n1, entries); + +n2 = malloc(sizeof(struct entry)); /* Insert after. */ +SLIST_INSERT_AFTER(n1, n2, entries); + +SLIST_REMOVE(&head, n2, entry, entries);/* Deletion. */ +free(n2); + +n3 = SLIST_FIRST(&head); +SLIST_REMOVE_HEAD(&head, entries); /* Deletion from the head. */ +free(n3); + /* Forward traversal. */ +SLIST_FOREACH(np, &head, entries) + np-> ... + /* Safe forward traversal. */ +SLIST_FOREACH_SAFE(np, &head, entries, np_temp) { + np->do_stuff(); + ... + SLIST_REMOVE(&head, np, entry, entries); + free(np); +} + +while (!SLIST_EMPTY(&head)) { /* List Deletion. */ + n1 = SLIST_FIRST(&head); + SLIST_REMOVE_HEAD(&head, entries); + free(n1); +} +.Ed +.Sh SINGLY-LINKED TAIL QUEUES +A singly-linked tail queue is headed by a structure defined by the +.Nm STAILQ_HEAD +macro. +This structure contains a pair of pointers, +one to the first element in the tail queue and the other to +the last element in the tail queue. +The elements are singly linked for minimum space and pointer +manipulation overhead at the expense of O(n) removal for arbitrary +elements. +New elements can be added to the tail queue after an existing element, +at the head of the tail queue, or at the end of the tail queue. +A +.Fa STAILQ_HEAD +structure is declared as follows: +.Bd -literal -offset indent +STAILQ_HEAD(HEADNAME, TYPE) head; +.Ed +.Pp +where +.Li HEADNAME +is the name of the structure to be defined, and +.Li TYPE +is the type of the elements to be linked into the tail queue. +A pointer to the head of the tail queue can later be declared as: +.Bd -literal -offset indent +struct HEADNAME *headp; +.Ed +.Pp +(The names +.Li head +and +.Li headp +are user selectable.) +.Pp +The macro +.Nm STAILQ_HEAD_INITIALIZER +evaluates to an initializer for the tail queue +.Fa head . +.Pp +The macro +.Nm STAILQ_CONCAT +concatenates the tail queue headed by +.Fa head2 +onto the end of the one headed by +.Fa head1 +removing all entries from the former. +.Pp +The macro +.Nm STAILQ_EMPTY +evaluates to true if there are no items on the tail queue. +.Pp +The macro +.Nm STAILQ_ENTRY +declares a structure that connects the elements in +the tail queue. +.Pp +The macro +.Nm STAILQ_FIRST +returns the first item on the tail queue or NULL if the tail queue +is empty. +.Pp +The macro +.Nm STAILQ_FOREACH +traverses the tail queue referenced by +.Fa head +in the forward direction, assigning each element +in turn to +.Fa var . +.Pp +The macro +.Nm STAILQ_FOREACH_SAFE +traverses the tail queue referenced by +.Fa head +in the forward direction, assigning each element +in turn to +.Fa var . +However, unlike +.Fn STAILQ_FOREACH +here it is permitted to both remove +.Fa var +as well as free it from within the loop safely without interfering with the +traversal. +.Pp +The macro +.Nm STAILQ_INIT +initializes the tail queue referenced by +.Fa head . +.Pp +The macro +.Nm STAILQ_INSERT_HEAD +inserts the new element +.Fa elm +at the head of the tail queue. +.Pp +The macro +.Nm STAILQ_INSERT_TAIL +inserts the new element +.Fa elm +at the end of the tail queue. +.Pp +The macro +.Nm STAILQ_INSERT_AFTER +inserts the new element +.Fa elm +after the element +.Fa listelm . +.Pp +The macro +.Nm STAILQ_LAST +returns the last item on the tail queue. +If the tail queue is empty the return value is +.Dv NULL . +.Pp +The macro +.Nm STAILQ_NEXT +returns the next item on the tail queue, or NULL this item is the last. +.Pp +The macro +.Nm STAILQ_REMOVE_AFTER +removes the element after +.Fa elm +from the tail queue. Unlike +.Fa STAILQ_REMOVE , +this macro does not traverse the entire tail queue. +.Pp +The macro +.Nm STAILQ_REMOVE_HEAD +removes the element at the head of the tail queue. +For optimum efficiency, +elements being removed from the head of the tail queue should +use this macro explicitly rather than the generic +.Fa STAILQ_REMOVE +macro. +.Pp +The macro +.Nm STAILQ_REMOVE +removes the element +.Fa elm +from the tail queue. +.Pp +The macro +.Nm STAILQ_SWAP +swaps the contents of +.Fa head1 +and +.Fa head2 . +.Sh SINGLY-LINKED TAIL QUEUE EXAMPLE +.Bd -literal +STAILQ_HEAD(stailhead, entry) head = + STAILQ_HEAD_INITIALIZER(head); +struct stailhead *headp; /* Singly-linked tail queue head. */ +struct entry { + ... + STAILQ_ENTRY(entry) entries; /* Tail queue. */ + ... +} *n1, *n2, *n3, *np; + +STAILQ_INIT(&head); /* Initialize the queue. */ + +n1 = malloc(sizeof(struct entry)); /* Insert at the head. */ +STAILQ_INSERT_HEAD(&head, n1, entries); + +n1 = malloc(sizeof(struct entry)); /* Insert at the tail. */ +STAILQ_INSERT_TAIL(&head, n1, entries); + +n2 = malloc(sizeof(struct entry)); /* Insert after. */ +STAILQ_INSERT_AFTER(&head, n1, n2, entries); + /* Deletion. */ +STAILQ_REMOVE(&head, n2, entry, entries); +free(n2); + /* Deletion from the head. */ +n3 = STAILQ_FIRST(&head); +STAILQ_REMOVE_HEAD(&head, entries); +free(n3); + /* Forward traversal. */ +STAILQ_FOREACH(np, &head, entries) + np-> ... + /* Safe forward traversal. */ +STAILQ_FOREACH_SAFE(np, &head, entries, np_temp) { + np->do_stuff(); + ... + STAILQ_REMOVE(&head, np, entry, entries); + free(np); +} + /* TailQ Deletion. */ +while (!STAILQ_EMPTY(&head)) { + n1 = STAILQ_FIRST(&head); + STAILQ_REMOVE_HEAD(&head, entries); + free(n1); +} + /* Faster TailQ Deletion. */ +n1 = STAILQ_FIRST(&head); +while (n1 != NULL) { + n2 = STAILQ_NEXT(n1, entries); + free(n1); + n1 = n2; +} +STAILQ_INIT(&head); +.Ed +.Sh LISTS +A list is headed by a structure defined by the +.Nm LIST_HEAD +macro. +This structure contains a single pointer to the first element +on the list. +The elements are doubly linked so that an arbitrary element can be +removed without traversing the list. +New elements can be added to the list after an existing element, +before an existing element, or at the head of the list. +A +.Fa LIST_HEAD +structure is declared as follows: +.Bd -literal -offset indent +LIST_HEAD(HEADNAME, TYPE) head; +.Ed +.Pp +where +.Fa HEADNAME +is the name of the structure to be defined, and +.Fa TYPE +is the type of the elements to be linked into the list. +A pointer to the head of the list can later be declared as: +.Bd -literal -offset indent +struct HEADNAME *headp; +.Ed +.Pp +(The names +.Li head +and +.Li headp +are user selectable.) +.Pp +The macro +.Nm LIST_HEAD_INITIALIZER +evaluates to an initializer for the list +.Fa head . +.Pp +The macro +.Nm LIST_EMPTY +evaluates to true if there are no elements in the list. +.Pp +The macro +.Nm LIST_ENTRY +declares a structure that connects the elements in +the list. +.Pp +The macro +.Nm LIST_FIRST +returns the first element in the list or NULL if the list +is empty. +.Pp +The macro +.Nm LIST_FOREACH +traverses the list referenced by +.Fa head +in the forward direction, assigning each element in turn to +.Fa var . +.Pp +The macro +.Nm LIST_FOREACH_SAFE +traverses the list referenced by +.Fa head +in the forward direction, assigning each element in turn to +.Fa var . +However, unlike +.Fn LIST_FOREACH +here it is permitted to both remove +.Fa var +as well as free it from within the loop safely without interfering with the +traversal. +.Pp +The macro +.Nm LIST_INIT +initializes the list referenced by +.Fa head . +.Pp +The macro +.Nm LIST_INSERT_HEAD +inserts the new element +.Fa elm +at the head of the list. +.Pp +The macro +.Nm LIST_INSERT_AFTER +inserts the new element +.Fa elm +after the element +.Fa listelm . +.Pp +The macro +.Nm LIST_INSERT_BEFORE +inserts the new element +.Fa elm +before the element +.Fa listelm . +.Pp +The macro +.Nm LIST_NEXT +returns the next element in the list, or NULL if this is the last. +.Pp +The macro +.Nm LIST_REMOVE +removes the element +.Fa elm +from the list. +.Pp +The macro +.Nm LIST_SWAP +swaps the contents of +.Fa head1 +and +.Fa head2 . +.Sh LIST EXAMPLE +.Bd -literal +LIST_HEAD(listhead, entry) head = + LIST_HEAD_INITIALIZER(head); +struct listhead *headp; /* List head. */ +struct entry { + ... + LIST_ENTRY(entry) entries; /* List. */ + ... +} *n1, *n2, *n3, *np, *np_temp; + +LIST_INIT(&head); /* Initialize the list. */ + +n1 = malloc(sizeof(struct entry)); /* Insert at the head. */ +LIST_INSERT_HEAD(&head, n1, entries); + +n2 = malloc(sizeof(struct entry)); /* Insert after. */ +LIST_INSERT_AFTER(n1, n2, entries); + +n3 = malloc(sizeof(struct entry)); /* Insert before. */ +LIST_INSERT_BEFORE(n2, n3, entries); + +LIST_REMOVE(n2, entries); /* Deletion. */ +free(n2); + /* Forward traversal. */ +LIST_FOREACH(np, &head, entries) + np-> ... + + /* Safe forward traversal. */ +LIST_FOREACH_SAFE(np, &head, entries, np_temp) { + np->do_stuff(); + ... + LIST_REMOVE(np, entries); + free(np); +} + +while (!LIST_EMPTY(&head)) { /* List Deletion. */ + n1 = LIST_FIRST(&head); + LIST_REMOVE(n1, entries); + free(n1); +} + +n1 = LIST_FIRST(&head); /* Faster List Deletion. */ +while (n1 != NULL) { + n2 = LIST_NEXT(n1, entries); + free(n1); + n1 = n2; +} +LIST_INIT(&head); +.Ed +.Sh TAIL QUEUES +A tail queue is headed by a structure defined by the +.Nm TAILQ_HEAD +macro. +This structure contains a pair of pointers, +one to the first element in the tail queue and the other to +the last element in the tail queue. +The elements are doubly linked so that an arbitrary element can be +removed without traversing the tail queue. +New elements can be added to the tail queue after an existing element, +before an existing element, at the head of the tail queue, +or at the end of the tail queue. +A +.Fa TAILQ_HEAD +structure is declared as follows: +.Bd -literal -offset indent +TAILQ_HEAD(HEADNAME, TYPE) head; +.Ed +.Pp +where +.Li HEADNAME +is the name of the structure to be defined, and +.Li TYPE +is the type of the elements to be linked into the tail queue. +A pointer to the head of the tail queue can later be declared as: +.Bd -literal -offset indent +struct HEADNAME *headp; +.Ed +.Pp +(The names +.Li head +and +.Li headp +are user selectable.) +.Pp +The macro +.Nm TAILQ_HEAD_INITIALIZER +evaluates to an initializer for the tail queue +.Fa head . +.Pp +The macro +.Nm TAILQ_CONCAT +concatenates the tail queue headed by +.Fa head2 +onto the end of the one headed by +.Fa head1 +removing all entries from the former. +.Pp +The macro +.Nm TAILQ_EMPTY +evaluates to true if there are no items on the tail queue. +.Pp +The macro +.Nm TAILQ_ENTRY +declares a structure that connects the elements in +the tail queue. +.Pp +The macro +.Nm TAILQ_FIRST +returns the first item on the tail queue or NULL if the tail queue +is empty. +.Pp +The macro +.Nm TAILQ_FOREACH +traverses the tail queue referenced by +.Fa head +in the forward direction, assigning each element in turn to +.Fa var . +.Fa var +is set to +.Dv NULL +if the loop completes normally, or if there were no elements. +.Pp +The macro +.Nm TAILQ_FOREACH_REVERSE +traverses the tail queue referenced by +.Fa head +in the reverse direction, assigning each element in turn to +.Fa var . +.Pp +The macros +.Nm TAILQ_FOREACH_SAFE +and +.Nm TAILQ_FOREACH_REVERSE_SAFE +traverse the list referenced by +.Fa head +in the forward or reverse direction respectively, +assigning each element in turn to +.Fa var . +However, unlike their unsafe counterparts, +.Nm TAILQ_FOREACH +and +.Nm TAILQ_FOREACH_REVERSE +permit to both remove +.Fa var +as well as free it from within the loop safely without interfering with the +traversal. +.Pp +The macro +.Nm TAILQ_INIT +initializes the tail queue referenced by +.Fa head . +.Pp +The macro +.Nm TAILQ_INSERT_HEAD +inserts the new element +.Fa elm +at the head of the tail queue. +.Pp +The macro +.Nm TAILQ_INSERT_TAIL +inserts the new element +.Fa elm +at the end of the tail queue. +.Pp +The macro +.Nm TAILQ_INSERT_AFTER +inserts the new element +.Fa elm +after the element +.Fa listelm . +.Pp +The macro +.Nm TAILQ_INSERT_BEFORE +inserts the new element +.Fa elm +before the element +.Fa listelm . +.Pp +The macro +.Nm TAILQ_LAST +returns the last item on the tail queue. +If the tail queue is empty the return value is +.Dv NULL . +.Pp +The macro +.Nm TAILQ_NEXT +returns the next item on the tail queue, or NULL if this item is the last. +.Pp +The macro +.Nm TAILQ_PREV +returns the previous item on the tail queue, or NULL if this item +is the first. +.Pp +The macro +.Nm TAILQ_REMOVE +removes the element +.Fa elm +from the tail queue. +.Pp +The macro +.Nm TAILQ_SWAP +swaps the contents of +.Fa head1 +and +.Fa head2 . +.Sh TAIL QUEUE EXAMPLE +.Bd -literal +TAILQ_HEAD(tailhead, entry) head = + TAILQ_HEAD_INITIALIZER(head); +struct tailhead *headp; /* Tail queue head. */ +struct entry { + ... + TAILQ_ENTRY(entry) entries; /* Tail queue. */ + ... +} *n1, *n2, *n3, *np; + +TAILQ_INIT(&head); /* Initialize the queue. */ + +n1 = malloc(sizeof(struct entry)); /* Insert at the head. */ +TAILQ_INSERT_HEAD(&head, n1, entries); + +n1 = malloc(sizeof(struct entry)); /* Insert at the tail. */ +TAILQ_INSERT_TAIL(&head, n1, entries); + +n2 = malloc(sizeof(struct entry)); /* Insert after. */ +TAILQ_INSERT_AFTER(&head, n1, n2, entries); + +n3 = malloc(sizeof(struct entry)); /* Insert before. */ +TAILQ_INSERT_BEFORE(n2, n3, entries); + +TAILQ_REMOVE(&head, n2, entries); /* Deletion. */ +free(n2); + /* Forward traversal. */ +TAILQ_FOREACH(np, &head, entries) + np-> ... + /* Safe forward traversal. */ +TAILQ_FOREACH_SAFE(np, &head, entries, np_temp) { + np->do_stuff(); + ... + TAILQ_REMOVE(&head, np, entries); + free(np); +} + /* Reverse traversal. */ +TAILQ_FOREACH_REVERSE(np, &head, tailhead, entries) + np-> ... + /* TailQ Deletion. */ +while (!TAILQ_EMPTY(&head)) { + n1 = TAILQ_FIRST(&head); + TAILQ_REMOVE(&head, n1, entries); + free(n1); +} + /* Faster TailQ Deletion. */ +n1 = TAILQ_FIRST(&head); +while (n1 != NULL) { + n2 = TAILQ_NEXT(n1, entries); + free(n1); + n1 = n2; +} +TAILQ_INIT(&head); +.Ed +.Sh SEE ALSO +.Xr tree 3 +.Sh HISTORY +The +.Nm queue +functions first appeared in +.Bx 4.4 . diff -Nru xen-4.9.0/extras/mini-os/include/minios-external/bsd-sys-queue.h xen-4.9.2/extras/mini-os/include/minios-external/bsd-sys-queue.h --- xen-4.9.0/extras/mini-os/include/minios-external/bsd-sys-queue.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/minios-external/bsd-sys-queue.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,637 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + * $FreeBSD$ + */ + +#ifndef _SYS_QUEUE_H_ +#define _SYS_QUEUE_H_ + +#include + +/* + * This file defines four types of data structures: singly-linked lists, + * singly-linked tail queues, lists and tail queues. + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A singly-linked tail queue is headed by a pair of pointers, one to the + * head of the list and the other to the tail of the list. The elements are + * singly linked for minimum space and pointer manipulation overhead at the + * expense of O(n) removal for arbitrary elements. New elements can be added + * to the list after an existing element, at the head of the list, or at the + * end of the list. Elements being removed from the head of the tail queue + * should use the explicit macro for this purpose for optimum efficiency. + * A singly-linked tail queue may only be traversed in the forward direction. + * Singly-linked tail queues are ideal for applications with large datasets + * and few or no removals or for implementing a FIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * For details on the use of these macros, see the queue(3) manual page. + * + * + * SLIST LIST STAILQ TAILQ + * _HEAD + + + + + * _HEAD_INITIALIZER + + + + + * _ENTRY + + + + + * _INIT + + + + + * _EMPTY + + + + + * _FIRST + + + + + * _NEXT + + + + + * _PREV - - - + + * _LAST - - + + + * _FOREACH + + + + + * _FOREACH_SAFE + + + + + * _FOREACH_REVERSE - - - + + * _FOREACH_REVERSE_SAFE - - - + + * _INSERT_HEAD + + + + + * _INSERT_BEFORE - + - + + * _INSERT_AFTER + + + + + * _INSERT_TAIL - - + + + * _CONCAT - - + + + * _REMOVE_AFTER + - + - + * _REMOVE_HEAD + - + - + * _REMOVE + + + + + * _SWAP + + + + + * + */ +#ifdef QUEUE_MACRO_DEBUG +/* Store the last 2 places the queue element or head was altered */ +struct qm_trace { + char * lastfile; + int lastline; + char * prevfile; + int prevline; +}; + +#define TRACEBUF struct qm_trace trace; +#define TRASHIT(x) do {(x) = (void *)-1;} while (0) +#define QMD_SAVELINK(name, link) void **name = (void *)&(link) + +#define QMD_TRACE_HEAD(head) do { \ + (head)->trace.prevline = (head)->trace.lastline; \ + (head)->trace.prevfile = (head)->trace.lastfile; \ + (head)->trace.lastline = __LINE__; \ + (head)->trace.lastfile = __FILE__; \ +} while (0) + +#define QMD_TRACE_ELEM(elem) do { \ + (elem)->trace.prevline = (elem)->trace.lastline; \ + (elem)->trace.prevfile = (elem)->trace.lastfile; \ + (elem)->trace.lastline = __LINE__; \ + (elem)->trace.lastfile = __FILE__; \ +} while (0) + +#else +#define QMD_TRACE_ELEM(elem) +#define QMD_TRACE_HEAD(head) +#define QMD_SAVELINK(name, link) +#define TRACEBUF +#define TRASHIT(x) +#endif /* QUEUE_MACRO_DEBUG */ + +/* + * Singly-linked List declarations. + */ +#define SLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define SLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define SLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List functions. + */ +#define SLIST_EMPTY(head) ((head)->slh_first == NULL) + +#define SLIST_FIRST(head) ((head)->slh_first) + +#define SLIST_FOREACH(var, head, field) \ + for ((var) = SLIST_FIRST((head)); \ + (var); \ + (var) = SLIST_NEXT((var), field)) + +#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = SLIST_FIRST((head)); \ + (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ + for ((varp) = &SLIST_FIRST((head)); \ + ((var) = *(varp)) != NULL; \ + (varp) = &SLIST_NEXT((var), field)) + +#define SLIST_INIT(head) do { \ + SLIST_FIRST((head)) = NULL; \ +} while (0) + +#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ + SLIST_NEXT((slistelm), field) = (elm); \ +} while (0) + +#define SLIST_INSERT_HEAD(head, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ + SLIST_FIRST((head)) = (elm); \ +} while (0) + +#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) + +#define SLIST_REMOVE(head, elm, type, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.sle_next); \ + if (SLIST_FIRST((head)) == (elm)) { \ + SLIST_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = SLIST_FIRST((head)); \ + while (SLIST_NEXT(curelm, field) != (elm)) \ + curelm = SLIST_NEXT(curelm, field); \ + SLIST_REMOVE_AFTER(curelm, field); \ + } \ + TRASHIT(*oldnext); \ +} while (0) + +#define SLIST_REMOVE_AFTER(elm, field) do { \ + SLIST_NEXT(elm, field) = \ + SLIST_NEXT(SLIST_NEXT(elm, field), field); \ +} while (0) + +#define SLIST_REMOVE_HEAD(head, field) do { \ + SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ +} while (0) + +#define SLIST_SWAP(head1, head2, type) do { \ + struct type *swap_first = SLIST_FIRST(head1); \ + SLIST_FIRST(head1) = SLIST_FIRST(head2); \ + SLIST_FIRST(head2) = swap_first; \ +} while (0) + +/* + * Singly-linked Tail queue declarations. + */ +#define STAILQ_HEAD(name, type) \ +struct name { \ + struct type *stqh_first;/* first element */ \ + struct type **stqh_last;/* addr of last next element */ \ +} + +#define STAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).stqh_first } + +#define STAILQ_ENTRY(type) \ +struct { \ + struct type *stqe_next; /* next element */ \ +} + +/* + * Singly-linked Tail queue functions. + */ +#define STAILQ_CONCAT(head1, head2) do { \ + if (!STAILQ_EMPTY((head2))) { \ + *(head1)->stqh_last = (head2)->stqh_first; \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_INIT((head2)); \ + } \ +} while (0) + +#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) + +#define STAILQ_FIRST(head) ((head)->stqh_first) + +#define STAILQ_FOREACH(var, head, field) \ + for((var) = STAILQ_FIRST((head)); \ + (var); \ + (var) = STAILQ_NEXT((var), field)) + + +#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = STAILQ_FIRST((head)); \ + (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define STAILQ_INIT(head) do { \ + STAILQ_FIRST((head)) = NULL; \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_NEXT((tqelm), field) = (elm); \ +} while (0) + +#define STAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_FIRST((head)) = (elm); \ +} while (0) + +#define STAILQ_INSERT_TAIL(head, elm, field) do { \ + STAILQ_NEXT((elm), field) = NULL; \ + *(head)->stqh_last = (elm); \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_LAST(head, type, field) \ + (STAILQ_EMPTY((head)) ? \ + NULL : \ + ((struct type *)(void *) \ + ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) + +#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) + +#define STAILQ_REMOVE(head, elm, type, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.stqe_next); \ + if (STAILQ_FIRST((head)) == (elm)) { \ + STAILQ_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = STAILQ_FIRST((head)); \ + while (STAILQ_NEXT(curelm, field) != (elm)) \ + curelm = STAILQ_NEXT(curelm, field); \ + STAILQ_REMOVE_AFTER(head, curelm, field); \ + } \ + TRASHIT(*oldnext); \ +} while (0) + +#define STAILQ_REMOVE_AFTER(head, elm, field) do { \ + if ((STAILQ_NEXT(elm, field) = \ + STAILQ_NEXT(STAILQ_NEXT(elm, field), field)) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_REMOVE_HEAD(head, field) do { \ + if ((STAILQ_FIRST((head)) = \ + STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_SWAP(head1, head2, type) do { \ + struct type *swap_first = STAILQ_FIRST(head1); \ + struct type **swap_last = (head1)->stqh_last; \ + STAILQ_FIRST(head1) = STAILQ_FIRST(head2); \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_FIRST(head2) = swap_first; \ + (head2)->stqh_last = swap_last; \ + if (STAILQ_EMPTY(head1)) \ + (head1)->stqh_last = &STAILQ_FIRST(head1); \ + if (STAILQ_EMPTY(head2)) \ + (head2)->stqh_last = &STAILQ_FIRST(head2); \ +} while (0) + + +/* + * List declarations. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ + +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_LIST_CHECK_HEAD(head, field) do { \ + if (LIST_FIRST((head)) != NULL && \ + LIST_FIRST((head))->field.le_prev != \ + &LIST_FIRST((head))) \ + panic("Bad list head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_LIST_CHECK_NEXT(elm, field) do { \ + if (LIST_NEXT((elm), field) != NULL && \ + LIST_NEXT((elm), field)->field.le_prev != \ + &((elm)->field.le_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_LIST_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.le_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_LIST_CHECK_HEAD(head, field) +#define QMD_LIST_CHECK_NEXT(elm, field) +#define QMD_LIST_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define LIST_EMPTY(head) ((head)->lh_first == NULL) + +#define LIST_FIRST(head) ((head)->lh_first) + +#define LIST_FOREACH(var, head, field) \ + for ((var) = LIST_FIRST((head)); \ + (var); \ + (var) = LIST_NEXT((var), field)) + +#define LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = LIST_FIRST((head)); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define LIST_INIT(head) do { \ + LIST_FIRST((head)) = NULL; \ +} while (0) + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ + QMD_LIST_CHECK_NEXT(listelm, field); \ + if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ + LIST_NEXT((listelm), field)->field.le_prev = \ + &LIST_NEXT((elm), field); \ + LIST_NEXT((listelm), field) = (elm); \ + (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ + QMD_LIST_CHECK_PREV(listelm, field); \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + LIST_NEXT((elm), field) = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \ + QMD_LIST_CHECK_HEAD((head), field); \ + if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ + LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ + LIST_FIRST((head)) = (elm); \ + (elm)->field.le_prev = &LIST_FIRST((head)); \ +} while (0) + +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define LIST_REMOVE(elm, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.le_next); \ + QMD_SAVELINK(oldprev, (elm)->field.le_prev); \ + QMD_LIST_CHECK_NEXT(elm, field); \ + QMD_LIST_CHECK_PREV(elm, field); \ + if (LIST_NEXT((elm), field) != NULL) \ + LIST_NEXT((elm), field)->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = LIST_NEXT((elm), field); \ + TRASHIT(*oldnext); \ + TRASHIT(*oldprev); \ +} while (0) + +#define LIST_SWAP(head1, head2, type, field) do { \ + struct type *swap_tmp = LIST_FIRST((head1)); \ + LIST_FIRST((head1)) = LIST_FIRST((head2)); \ + LIST_FIRST((head2)) = swap_tmp; \ + if ((swap_tmp = LIST_FIRST((head1))) != NULL) \ + swap_tmp->field.le_prev = &LIST_FIRST((head1)); \ + if ((swap_tmp = LIST_FIRST((head2))) != NULL) \ + swap_tmp->field.le_prev = &LIST_FIRST((head2)); \ +} while (0) + +/* + * Tail queue declarations. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ +} + +#define TAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first } + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ +} + +/* + * Tail queue functions. + */ +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_TAILQ_CHECK_HEAD(head, field) do { \ + if (!TAILQ_EMPTY(head) && \ + TAILQ_FIRST((head))->field.tqe_prev != \ + &TAILQ_FIRST((head))) \ + panic("Bad tailq head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_TAIL(head, field) do { \ + if (*(head)->tqh_last != NULL) \ + panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_NEXT(elm, field) do { \ + if (TAILQ_NEXT((elm), field) != NULL && \ + TAILQ_NEXT((elm), field)->field.tqe_prev != \ + &((elm)->field.tqe_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_TAILQ_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.tqe_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_TAILQ_CHECK_HEAD(head, field) +#define QMD_TAILQ_CHECK_TAIL(head, headname) +#define QMD_TAILQ_CHECK_NEXT(elm, field) +#define QMD_TAILQ_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define TAILQ_CONCAT(head1, head2, field) do { \ + if (!TAILQ_EMPTY(head2)) { \ + *(head1)->tqh_last = (head2)->tqh_first; \ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ + (head1)->tqh_last = (head2)->tqh_last; \ + TAILQ_INIT((head2)); \ + QMD_TRACE_HEAD(head1); \ + QMD_TRACE_HEAD(head2); \ + } \ +} while (0) + +#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) + +#define TAILQ_FIRST(head) ((head)->tqh_first) + +#define TAILQ_FOREACH(var, head, field) \ + for ((var) = TAILQ_FIRST((head)); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST((head)); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_INIT(head) do { \ + TAILQ_FIRST((head)) = NULL; \ + (head)->tqh_last = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + QMD_TAILQ_CHECK_NEXT(listelm, field); \ + if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else { \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + } \ + TAILQ_NEXT((listelm), field) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + QMD_TAILQ_CHECK_PREV(listelm, field); \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + TAILQ_NEXT((elm), field) = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ + QMD_TAILQ_CHECK_HEAD(head, field); \ + if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ + TAILQ_FIRST((head))->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + TAILQ_FIRST((head)) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ + QMD_TAILQ_CHECK_TAIL(head, field); \ + TAILQ_NEXT((elm), field) = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) + +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + +#define TAILQ_REMOVE(head, elm, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \ + QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \ + QMD_TAILQ_CHECK_NEXT(elm, field); \ + QMD_TAILQ_CHECK_PREV(elm, field); \ + if ((TAILQ_NEXT((elm), field)) != NULL) \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else { \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + QMD_TRACE_HEAD(head); \ + } \ + *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ + TRASHIT(*oldnext); \ + TRASHIT(*oldprev); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_SWAP(head1, head2, type, field) do { \ + struct type *swap_first = (head1)->tqh_first; \ + struct type **swap_last = (head1)->tqh_last; \ + (head1)->tqh_first = (head2)->tqh_first; \ + (head1)->tqh_last = (head2)->tqh_last; \ + (head2)->tqh_first = swap_first; \ + (head2)->tqh_last = swap_last; \ + if ((swap_first = (head1)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head1)->tqh_first; \ + else \ + (head1)->tqh_last = &(head1)->tqh_first; \ + if ((swap_first = (head2)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head2)->tqh_first; \ + else \ + (head2)->tqh_last = &(head2)->tqh_first; \ +} while (0) + +#endif /* !_SYS_QUEUE_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/minios-external/bsd-sys-queue-h-seddery xen-4.9.2/extras/mini-os/include/minios-external/bsd-sys-queue-h-seddery --- xen-4.9.0/extras/mini-os/include/minios-external/bsd-sys-queue-h-seddery 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/minios-external/bsd-sys-queue-h-seddery 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,72 @@ +#!/usr/bin/perl -p +# +# This script is part of the Xen build system. It has a very +# permissive licence to avoid complicating the licence of the +# generated header file and to allow this seddery to be reused by +# other projects. +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this individual file (the "Software"), to deal +# in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, +# sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the +# following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Copyright (C) 2011 Citrix Ltd + +our $namespace, $ucnamespace; + +BEGIN { + die unless @ARGV; + $namespace = pop @ARGV; + $namespace =~ s/^--prefix=// or die; + $ucnamespace = uc $namespace; + + print <tools/libxl/external/bsd-COPYRIGHT + +Exceptions: + +README + + This file + +bsd-sys-queue-h-seddery + + Script to transform the above into a new namespace. diff -Nru xen-4.9.0/extras/mini-os/include/mm.h xen-4.9.2/extras/mini-os/include/mm.h --- xen-4.9.0/extras/mini-os/include/mm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/mm.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,90 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * Copyright (c) 2005, Keir A Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _MM_H_ +#define _MM_H_ + +#if defined(__i386__) +#include +#elif defined(__x86_64__) +#include +#elif defined(__arm__) || defined(__aarch64__) +#include +#else +#error "Unsupported architecture" +#endif +#include + +#include +#include +#include + +#define STACK_SIZE_PAGE_ORDER __STACK_SIZE_PAGE_ORDER +#define STACK_SIZE __STACK_SIZE + +#define round_pgdown(_p) ((_p) & PAGE_MASK) +#define round_pgup(_p) (((_p) + (PAGE_SIZE - 1)) & PAGE_MASK) + +extern unsigned long nr_free_pages; + +extern unsigned long *mm_alloc_bitmap; +extern unsigned long mm_alloc_bitmap_size; + +void init_mm(void); +unsigned long alloc_pages(int order); +#define alloc_page() alloc_pages(0) +void free_pages(void *pointer, int order); +#define free_page(p) free_pages(p, 0) + +static __inline__ int get_order(unsigned long size) +{ + int order; + size = (size-1) >> PAGE_SHIFT; + for ( order = 0; size; order++ ) + size >>= 1; + return order; +} + +void arch_init_demand_mapping_area(void); +void arch_init_mm(unsigned long* start_pfn_p, unsigned long* max_pfn_p); + +unsigned long allocate_ondemand(unsigned long n, unsigned long alignment); +/* map f[i*stride]+i*increment for i in 0..n-1, aligned on alignment pages */ +void *map_frames_ex(const unsigned long *f, unsigned long n, unsigned long stride, + unsigned long increment, unsigned long alignment, domid_t id, + int *err, unsigned long prot); +int do_map_frames(unsigned long addr, + const unsigned long *f, unsigned long n, unsigned long stride, + unsigned long increment, domid_t id, int *err, unsigned long prot); +int unmap_frames(unsigned long va, unsigned long num_frames); +int map_frame_rw(unsigned long addr, unsigned long mfn); +unsigned long map_frame_virt(unsigned long mfn); +#ifdef HAVE_LIBC +extern unsigned long heap, brk, heap_mapped, heap_end; +#endif + +int free_physical_pages(xen_pfn_t *mfns, int n); +void fini_mm(void); + +#endif /* _MM_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/netfront.h xen-4.9.2/extras/mini-os/include/netfront.h --- xen-4.9.0/extras/mini-os/include/netfront.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/netfront.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,24 @@ +#include +#ifdef HAVE_LWIP +#include +#endif +struct netfront_dev; +struct netfront_dev *init_netfront(char *nodename, void (*netif_rx)(unsigned char *data, int len), unsigned char rawmac[6], char **ip); +void netfront_xmit(struct netfront_dev *dev, unsigned char* data,int len); +void shutdown_netfront(struct netfront_dev *dev); +#ifdef HAVE_LIBC +int netfront_tap_open(char *nodename); +ssize_t netfront_receive(struct netfront_dev *dev, unsigned char *data, size_t len); +#endif + +extern struct wait_queue_head netfront_queue; + +#ifdef HAVE_LWIP +/* Call this to bring up the netfront interface and the lwIP stack. + * N.B. _must_ be called from a thread; it's not safe to call this from + * app_main(). */ +void start_networking(void); +void stop_networking(void); + +void networking_set_addr(struct ip_addr *ipaddr, struct ip_addr *netmask, struct ip_addr *gw); +#endif diff -Nru xen-4.9.0/extras/mini-os/include/paravirt.h xen-4.9.2/extras/mini-os/include/paravirt.h --- xen-4.9.0/extras/mini-os/include/paravirt.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/paravirt.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,81 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2016 - Juergen Gross, SUSE Linux GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _PARAVIRT_H +#define _PARAVIRT_H + +#if defined(CONFIG_PARAVIRT) + +#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) +#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) + +/* for P2M */ +#ifdef __x86_64__ +#define P2M_SHIFT 9 +#else +#define P2M_SHIFT 10 +#endif +#define P2M_ENTRIES (1UL << P2M_SHIFT) +#define P2M_MASK (P2M_ENTRIES - 1) +#define L1_P2M_SHIFT P2M_SHIFT +#define L2_P2M_SHIFT (2 * P2M_SHIFT) +#define L3_P2M_SHIFT (3 * P2M_SHIFT) +#define L1_P2M_IDX(pfn) ((pfn) & P2M_MASK) +#define L2_P2M_IDX(pfn) (((pfn) >> L1_P2M_SHIFT) & P2M_MASK) +#define L3_P2M_IDX(pfn) (((pfn) >> L2_P2M_SHIFT) & P2M_MASK) +#define INVALID_P2M_ENTRY (~0UL) + +void p2m_chk_pfn(unsigned long pfn); + +static inline unsigned long p2m_pages(unsigned long pages) +{ + return (pages + P2M_ENTRIES - 1) >> L1_P2M_SHIFT; +} + +void arch_init_p2m(unsigned long max_pfn_p); + +#else + +#define mfn_to_pfn(_mfn) ((unsigned long)(_mfn)) +#define pfn_to_mfn(_pfn) ((unsigned long)(_pfn)) + +static inline void arch_init_p2m(unsigned long max_pfn_p) { } + +#endif + +#if defined(CONFIG_PARAVIRT) && defined(CONFIG_BALLOON) + +void arch_remap_p2m(unsigned long max_pfn); +int arch_expand_p2m(unsigned long max_pfn); + +#else + +static inline void arch_remap_p2m(unsigned long max_pfn) { } +static inline int arch_expand_p2m(unsigned long max_pfn) +{ + return 0; +} + +#endif + +#endif /* _PARAVIRT_H */ diff -Nru xen-4.9.0/extras/mini-os/include/pcifront.h xen-4.9.2/extras/mini-os/include/pcifront.h --- xen-4.9.0/extras/mini-os/include/pcifront.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/pcifront.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,29 @@ +#include +#include +struct pcifront_dev; +void pcifront_watches(void *opaque); +struct pcifront_dev *init_pcifront(char *nodename); +void pcifront_op(struct pcifront_dev *dev, struct xen_pci_op *op); +void pcifront_scan(struct pcifront_dev *dev, void (*fun)(unsigned int domain, unsigned int bus, unsigned slot, unsigned int fun)); +int pcifront_conf_read(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + unsigned int off, unsigned int size, unsigned int *val); +int pcifront_conf_write(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + unsigned int off, unsigned int size, unsigned int val); +int pcifront_enable_msi(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun); +int pcifront_disable_msi(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun); +int pcifront_enable_msix(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + struct xen_msix_entry *entries, int n); +int pcifront_disable_msix(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun); +void shutdown_pcifront(struct pcifront_dev *dev); diff -Nru xen-4.9.0/extras/mini-os/include/posix/arpa/inet.h xen-4.9.2/extras/mini-os/include/posix/arpa/inet.h --- xen-4.9.0/extras/mini-os/include/posix/arpa/inet.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/arpa/inet.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef _POSIX_ARPA_INET_H_ +#define _POSIX_ARPA_INET_H_ + +#include + +#endif /* _POSIX_ARPA_INET_H_ */ + diff -Nru xen-4.9.0/extras/mini-os/include/posix/dirent.h xen-4.9.2/extras/mini-os/include/posix/dirent.h --- xen-4.9.0/extras/mini-os/include/posix/dirent.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/dirent.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,24 @@ +#ifndef _POSIX_DIRENT_H +#define _POSIX_DIRENT_H + +#include + +struct dirent { + char *d_name; +}; + +typedef struct { + struct dirent dirent; + char *name; + int32_t offset; + char **entries; + int32_t curentry; + int32_t nbentries; + int has_more; +} DIR; + +DIR *opendir(const char *name); +struct dirent *readdir(DIR *dir); +int closedir(DIR *dir); + +#endif /* _POSIX_DIRENT_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/err.h xen-4.9.2/extras/mini-os/include/posix/err.h --- xen-4.9.0/extras/mini-os/include/posix/err.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/err.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,15 @@ +#ifndef _POSIX_ERR_H +#define _POSIX_ERR_H + +#include + +void err(int eval, const char *fmt, ...); +void errx(int eval, const char *fmt, ...); +void warn(const char *fmt, ...); +void warnx(const char *fmt, ...); +void verr(int eval, const char *fmt, va_list args); +void verrx(int eval, const char *fmt, va_list args); +void vwarn(const char *fmt, va_list args); +void vwarnx(const char *fmt, va_list args); + +#endif /* _POSIX_ERR_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/fcntl.h xen-4.9.2/extras/mini-os/include/posix/fcntl.h --- xen-4.9.0/extras/mini-os/include/posix/fcntl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/fcntl.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,11 @@ +#ifndef _POSIX_FCNTL_H +#define _POSIX_FCNTL_H + +#include_next + +#define F_ULOCK 0 +#define F_LOCK 1 +#define F_TLOCK 2 +#define F_TEST 3 + +#endif /* _POSIX_FCNTL_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/limits.h xen-4.9.2/extras/mini-os/include/posix/limits.h --- xen-4.9.0/extras/mini-os/include/posix/limits.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/limits.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,48 @@ +#ifndef _POSIX_LIMITS_H +#define _POSIX_LIMITS_H + +#include + +#define CHAR_BIT 8 + +#define SCHAR_MAX 0x7f +#define SCHAR_MIN (-SCHAR_MAX-1) +#define UCHAR_MAX 0xff + +#ifdef __CHAR_UNSIGNED__ +# define CHAR_MIN 0 +# define CHAR_MAX UCHAR_MAX +#else +# define CHAR_MIN SCHAR_MIN +# define CHAR_MAX SCHAR_MAX +#endif + +#define INT_MAX 0x7fffffff +#define INT_MIN (-INT_MAX-1) +#define UINT_MAX 0xffffffff + +#define SHRT_MIN (-0x8000) +#define SHRT_MAX 0x7fff +#define USHRT_MAX 0xffff + +#if defined(__x86_64__) +# define LONG_MAX 0x7fffffffffffffffL +# define ULONG_MAX 0xffffffffffffffffUL +#else +# define LONG_MAX 0x7fffffffL +# define ULONG_MAX 0xffffffffUL +#endif +#define LONG_MIN (-LONG_MAX-1L) + +#define LLONG_MAX 0x7fffffffffffffffLL +#define LLONG_MIN (-LLONG_MAX-1LL) +#define ULLONG_MAX 0xffffffffffffffffULL + +#define LONG_LONG_MIN LLONG_MIN +#define LONG_LONG_MAX LLONG_MAX +#define ULONG_LONG_MAX ULLONG_MAX + +#define PATH_MAX __PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE + +#endif /* _POSIX_LIMITS_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/net/if.h xen-4.9.2/extras/mini-os/include/posix/net/if.h --- xen-4.9.0/extras/mini-os/include/posix/net/if.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/net/if.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,85 @@ +/* + * This code is mostly taken from NetBSD net/if.h + * Changes: Stefano Stabellini + * + ****************************************************************************** + * + * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by William Studenmund and Jason R. Thorpe. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _NET_IF_H_ +#define _NET_IF_H_ + +/* + * Length of interface external name, including terminating '\0'. + * Note: this is the same size as a generic device's external name. + */ +#define IF_NAMESIZE 16 + +struct if_nameindex { + unsigned int if_index; /* 1, 2, ... */ + char *if_name; /* null terminated name: "le0", ... */ +}; + +unsigned int if_nametoindex(const char *); +char * if_indextoname(unsigned int, char *); +struct if_nameindex * if_nameindex(void); +void if_freenameindex(struct if_nameindex *); + +#endif /* !_NET_IF_H_ */ + diff -Nru xen-4.9.0/extras/mini-os/include/posix/netdb.h xen-4.9.2/extras/mini-os/include/posix/netdb.h --- xen-4.9.0/extras/mini-os/include/posix/netdb.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/netdb.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,9 @@ +#ifndef _POSIX_NETDB_H_ +#define _POSIX_NETDB_H_ + +struct hostent { + char *h_addr; +}; +#define gethostbyname(buf) NULL + +#endif /* _POSIX_NETDB_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/netinet/in.h xen-4.9.2/extras/mini-os/include/posix/netinet/in.h --- xen-4.9.0/extras/mini-os/include/posix/netinet/in.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/netinet/in.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef _POSIX_SYS_IN_H_ +#define _POSIX_SYS_IN_H_ + +#include +#include + +#endif /* _POSIX_SYS_IN_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/netinet/tcp.h xen-4.9.2/extras/mini-os/include/posix/netinet/tcp.h --- xen-4.9.0/extras/mini-os/include/posix/netinet/tcp.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/netinet/tcp.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,6 @@ +#ifndef _POSIX_SYS_TCP_H_ +#define _POSIX_SYS_TCP_H_ + +#include + +#endif /* _POSIX_SYS_TCP_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/poll.h xen-4.9.2/extras/mini-os/include/posix/poll.h --- xen-4.9.0/extras/mini-os/include/posix/poll.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/poll.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1 @@ +#include diff -Nru xen-4.9.0/extras/mini-os/include/posix/pthread.h xen-4.9.2/extras/mini-os/include/posix/pthread.h --- xen-4.9.0/extras/mini-os/include/posix/pthread.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/pthread.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,64 @@ +#ifndef _POSIX_PTHREAD_H +#define _POSIX_PTHREAD_H + +#include + +/* Let's be single-threaded for now. */ + +typedef struct { + void *ptr; +} *pthread_key_t; +static inline int pthread_key_create(pthread_key_t *key, void (*destr_function)(void*)) +{ + *key = malloc(sizeof(**key)); + (*key)->ptr = NULL; + return 0; +} +static inline int pthread_setspecific(pthread_key_t key, const void *pointer) +{ + key->ptr = (void*) pointer; + return 0; +} +static inline void *pthread_getspecific(pthread_key_t key) +{ + return key->ptr; +} +static inline int pthread_key_delete(pthread_key_t key) +{ + free(key); + return 0; +} + + + +typedef struct {} pthread_mutexattr_t; +static inline int pthread_mutexattr_init(pthread_mutexattr_t *mattr) { return 0; } +#define PTHREAD_MUTEX_NORMAL 0 +#define PTHREAD_MUTEX_RECURSIVE 1 +static inline int pthread_mutexattr_settype(pthread_mutexattr_t *mattr, int kind) { return 0; } +static inline int pthread_mutexattr_destroy(pthread_mutexattr_t *mattr) { return 0; } +typedef struct {} pthread_mutex_t; +#define PTHREAD_MUTEX_INITIALIZER {} +static inline int pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *mattr) { return 0; } +static inline int pthread_mutex_lock(pthread_mutex_t *mutex) { return 0; } +static inline int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; } + + + +typedef struct { + int done; +} pthread_once_t; +#define PTHREAD_ONCE_INIT { 0 } + +static inline int pthread_once(pthread_once_t *once_control, void (*init_routine)(void)) +{ + if (!once_control->done) { + once_control->done = 1; + init_routine(); + } + return 0; +} + +#define __thread + +#endif /* _POSIX_PTHREAD_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/signal.h xen-4.9.2/extras/mini-os/include/posix/signal.h --- xen-4.9.0/extras/mini-os/include/posix/signal.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/signal.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,10 @@ +#ifndef _POSIX_SIGNAL_H +#define _POSIX_SIGNAL_H + +#include_next + +int sigaction(int signum, const struct sigaction * __restrict, + struct sigaction * __restrict); + +#endif + diff -Nru xen-4.9.0/extras/mini-os/include/posix/stdlib.h xen-4.9.2/extras/mini-os/include/posix/stdlib.h --- xen-4.9.0/extras/mini-os/include/posix/stdlib.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/stdlib.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,8 @@ +#ifndef _POSIX_STDLIB_H +#define _POSIX_STDLIB_H + +#include_next + +#define realpath(p,r) strcpy(r,p) + +#endif /* _POSIX_STDLIB_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/strings.h xen-4.9.2/extras/mini-os/include/posix/strings.h --- xen-4.9.0/extras/mini-os/include/posix/strings.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/strings.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,12 @@ +#ifndef _POSIX_STRINGS_H +#define _POSIX_STRINGS_H + +#include + +#define bzero(ptr, size) (memset((ptr), '\0', (size)), (void) 0) + +int ffs (int i); +int ffsl (long int li); +int ffsll (long long int lli); + +#endif /* _POSIX_STRINGS_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/sys/ioctl.h xen-4.9.2/extras/mini-os/include/posix/sys/ioctl.h --- xen-4.9.0/extras/mini-os/include/posix/sys/ioctl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/sys/ioctl.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,16 @@ +#ifndef _POSIX_SYS_IOCTL_H +#define _POSIX_SYS_IOCTL_H + +int ioctl(int fd, int request, ...); + +#define _IOC_NONE 0 +#define _IOC_WRITE 1 +#define _IOC_READ 2 + +#define _IOC(rw, class, n, size) \ + (((rw ) << 30) | \ + ((class) << 22) | \ + ((n ) << 14) | \ + ((size ) << 0)) + +#endif /* _POSIX_SYS_IOCTL_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/sys/mman.h xen-4.9.2/extras/mini-os/include/posix/sys/mman.h --- xen-4.9.0/extras/mini-os/include/posix/sys/mman.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/sys/mman.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,22 @@ +#ifndef _POSIX_SYS_MMAN_H +#define _POSIX_SYS_MMAN_H + +#define PROT_READ 0x1 +#define PROT_WRITE 0x2 +#define PROT_EXEC 0x4 + +#define MAP_SHARED 0x01 +#define MAP_PRIVATE 0x02 +#define MAP_ANON 0x20 + +/* Pages are always resident anyway */ +#define MAP_LOCKED 0x0 + +#define MAP_FAILED ((void*)0) + +void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) asm("mmap64"); +int munmap(void *start, size_t length); +static inline mlock(const void *addr, size_t len) { return 0; } +static inline munlock(const void *addr, size_t len) { return 0; } + +#endif /* _POSIX_SYS_MMAN_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/sys/poll.h xen-4.9.2/extras/mini-os/include/posix/sys/poll.h --- xen-4.9.0/extras/mini-os/include/posix/sys/poll.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/sys/poll.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,79 @@ +/* + * This code is mostly taken from FreeBSD sys/sys/poll.h + * Changes: Stefano Stabellini + * + **************************************************************************** + * Copyright (c) 1997 Peter Wemm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _POSIX_SYS_POLL_H_ +#define _POSIX_SYS_POLL_H_ + +/* + * This file is intended to be compatible with the traditional poll.h. + */ + +typedef unsigned int nfds_t; + +/* + * This structure is passed as an array to poll(2). + */ +struct pollfd { + int fd; /* which file descriptor to poll */ + short events; /* events we are interested in */ + short revents; /* events found on return */ +}; + +/* + * Requestable events. If poll(2) finds any of these set, they are + * copied to revents on return. + * XXX Note that FreeBSD doesn't make much distinction between POLLPRI + * and POLLRDBAND since none of the file types have distinct priority + * bands - and only some have an urgent "mode". + * XXX Note POLLIN isn't really supported in true SVSV terms. Under SYSV + * POLLIN includes all of normal, band and urgent data. Most poll handlers + * on FreeBSD only treat it as "normal" data. + */ +#define POLLIN 0x0001 /* any readable data available */ +#define POLLPRI 0x0002 /* OOB/Urgent readable data */ +#define POLLOUT 0x0004 /* file descriptor is writeable */ +#define POLLRDNORM 0x0040 /* non-OOB/URG data available */ +#define POLLWRNORM POLLOUT /* no write type differentiation */ +#define POLLRDBAND 0x0080 /* OOB/Urgent readable data */ +#define POLLWRBAND 0x0100 /* OOB/Urgent data can be written */ + +/* + * These events are set if they occur regardless of whether they were + * requested. + */ +#define POLLERR 0x0008 /* some poll error occurred */ +#define POLLHUP 0x0010 /* file descriptor was "hung up" */ +#define POLLNVAL 0x0020 /* requested events "invalid" */ + +int poll(struct pollfd _pfd[], nfds_t _nfds, int _timeout); + +#endif /* _POSIX_SYS_POLL_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/sys/select.h xen-4.9.2/extras/mini-os/include/posix/sys/select.h --- xen-4.9.0/extras/mini-os/include/posix/sys/select.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/sys/select.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef _POSIX_SELECT_H +#define _POSIX_SELECT_H + +#include +int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); + +#endif /* _POSIX_SELECT_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/sys/socket.h xen-4.9.2/extras/mini-os/include/posix/sys/socket.h --- xen-4.9.0/extras/mini-os/include/posix/sys/socket.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/sys/socket.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,31 @@ +#ifndef _POSIX_SYS_SOCKET_H_ +#define _POSIX_SYS_SOCKET_H_ + +#include +#include + +int accept(int s, struct sockaddr *addr, socklen_t *addrlen); +int bind(int s, struct sockaddr *name, socklen_t namelen); +int shutdown(int s, int how); +int getpeername (int s, struct sockaddr *name, socklen_t *namelen); +int getsockname (int s, struct sockaddr *name, socklen_t *namelen); +int getsockopt (int s, int level, int optname, void *optval, socklen_t *optlen); +int setsockopt (int s, int level, int optname, const void *optval, socklen_t optlen); +int close(int s); +int connect(int s, struct sockaddr *name, socklen_t namelen); +int listen(int s, int backlog); +int recv(int s, void *mem, int len, unsigned int flags); +//int read(int s, void *mem, int len); +int recvfrom(int s, void *mem, int len, unsigned int flags, + struct sockaddr *from, socklen_t *fromlen); +int send(int s, void *dataptr, int size, unsigned int flags); +int sendto(int s, void *dataptr, int size, unsigned int flags, + struct sockaddr *to, socklen_t tolen); +int socket(int domain, int type, int protocol); +//int write(int s, void *dataptr, int size); +int select(int maxfdp1, fd_set *readset, fd_set *writeset, fd_set *exceptset, + struct timeval *timeout); +//int ioctl(int s, long cmd, void *argp); +int getsockname(int s, struct sockaddr *name, socklen_t *namelen); + +#endif /* _POSIX_SYS_SOCKET_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/sys/stat.h xen-4.9.2/extras/mini-os/include/posix/sys/stat.h --- xen-4.9.0/extras/mini-os/include/posix/sys/stat.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/sys/stat.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef _POSIX_SYS_STAT_H +#define _POSIX_SYS_STAT_H + +#include_next +int fstat(int fd, struct stat *buf) asm("fstat64"); + +#endif /* _POSIX_SYS_STAT_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/syslog.h xen-4.9.2/extras/mini-os/include/posix/syslog.h --- xen-4.9.0/extras/mini-os/include/posix/syslog.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/syslog.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,37 @@ +#ifndef _POSIX_SYSLOG_H +#define _POSIX_SYSLOG_H + +#include + +#define LOG_PID 0 +#define LOG_CONS 0 +#define LOG_NDELAY 0 +#define LOG_ODELAY 0 +#define LOG_NOWAIT 0 + +#define LOG_KERN 0 +#define LOG_USER 0 +#define LOG_MAIL 0 +#define LOG_NEWS 0 +#define LOG_UUCP 0 +#define LOG_DAEMON 0 +#define LOG_AUTH 0 +#define LOG_CRON 0 +#define LOG_LPR 0 + +/* TODO: support */ +#define LOG_EMERG 0 +#define LOG_ALERT 1 +#define LOG_CRIT 2 +#define LOG_ERR 3 +#define LOG_WARNING 4 +#define LOG_NOTICE 5 +#define LOG_INFO 6 +#define LOG_DEBUG 7 + +void openlog(const char *ident, int option, int facility); +void syslog(int priority, const char *format, ...); +void closelog(void); +void vsyslog(int priority, const char *format, va_list ap); + +#endif /* _POSIX_SYSLOG_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/termios.h xen-4.9.2/extras/mini-os/include/posix/termios.h --- xen-4.9.0/extras/mini-os/include/posix/termios.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/termios.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,87 @@ +#ifndef _POSIX_TERMIOS_H +#define _POSIX_TERMIOS_H + +#define NCC 32 + +struct termios { + unsigned long c_iflag; + unsigned long c_oflag; + unsigned long c_lflag; + unsigned long c_cflag; + unsigned char c_cc[NCC]; +}; + +/* modem lines */ +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RI 0x080 +#define TIOCM_DSR 0x100 + +/* c_iflag */ +#define IGNBRK 0x00000001 +#define BRKINT 0x00000002 +#define IGNPAR 0x00000004 +#define PARMRK 0x00000008 +#define INPCK 0x00000010 +#define ISTRIP 0x00000020 +#define INLCR 0x00000040 +#define IGNCR 0x00000080 +#define ICRNL 0x00000100 +#define IUCLC 0x00000200 +#define IXON 0x00000400 +#define IXANY 0x00000800 +#define IXOFF 0x00001000 +#define IMAXBEL 0x00002000 +#define IUTF8 0x00004000 + +/* c_oflag */ +#define OPOST 0x00000001 +#define OLCUC 0x00000002 +#define ONLCR 0x00000004 +#define OCRNL 0x00000008 +#define ONOCR 0x00000010 +#define ONLRET 0x00000020 +#define OFILL 0x00000040 +#define OFDEL 0x00000080 + +/* c_lflag */ +#define ISIG 0x00000001 +#define ICANON 0x00000002 +#define XCASE 0x00000004 +#define ECHO 0x00000008 +#define ECHOE 0x00000010 +#define ECHOK 0x00000020 +#define ECHONL 0x00000040 +#define NOFLSH 0x00000080 +#define TOSTOP 0x00000100 +#define ECHOCTL 0x00000200 +#define ECHOPRT 0x00000400 +#define ECHOKE 0x00000800 +#define FLUSHO 0x00002000 +#define PENDIN 0x00004000 +#define IEXTEN 0x00008000 + +/* c_cflag */ +#define CSIZE 0x00000030 +#define CS8 0x00000030 +#define CSTOPB 0x00000040 +#define CREAD 0x00000080 +#define PARENB 0x00000100 +#define PARODD 0x00000200 +#define HUPCL 0x00000400 +#define CLOCAL 0x00000800 + +/* c_cc */ +#define VTIME 5 +#define VMIN 6 + +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +int tcsetattr(int fildes, int action, const struct termios *tios); +int tcgetattr(int fildes, struct termios *tios); + +#endif /* _POSIX_TERMIOS_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/time.h xen-4.9.2/extras/mini-os/include/posix/time.h --- xen-4.9.0/extras/mini-os/include/posix/time.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/time.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,11 @@ +#ifndef _POSIX_TIME_H +#define _POSIX_TIME_H + +#include +#define CLOCK_MONOTONIC 2 +#include_next + +int nanosleep(const struct timespec *req, struct timespec *rem); +int clock_gettime(clockid_t clock_id, struct timespec *tp); + +#endif /* _POSIX_TIME_H */ diff -Nru xen-4.9.0/extras/mini-os/include/posix/unistd.h xen-4.9.2/extras/mini-os/include/posix/unistd.h --- xen-4.9.0/extras/mini-os/include/posix/unistd.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/posix/unistd.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,16 @@ +#ifndef _POSIX_UNISTD_H +#define _POSIX_UNISTD_H + +#include_next + +uid_t getuid(void); +uid_t geteuid(void); +gid_t getgid(void); +gid_t getegid(void); +int gethostname(char *name, size_t namelen); +size_t getpagesize(void); +int ftruncate(int fd, off_t length); +int lockf(int fd, int cmd, off_t len); +int nice(int inc); + +#endif /* _POSIX_UNISTD_H */ diff -Nru xen-4.9.0/extras/mini-os/include/sched.h xen-4.9.2/extras/mini-os/include/sched.h --- xen-4.9.0/extras/mini-os/include/sched.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/sched.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,55 @@ +#ifndef __SCHED_H__ +#define __SCHED_H__ + +#include +#include +#include +#ifdef HAVE_LIBC +#include +#endif + +struct thread +{ + char *name; + char *stack; + /* keep in that order */ + unsigned long sp; /* Stack pointer */ + unsigned long ip; /* Instruction pointer */ + MINIOS_TAILQ_ENTRY(struct thread) thread_list; + uint32_t flags; + s_time_t wakeup_time; +#ifdef HAVE_LIBC + struct _reent reent; +#endif +}; + +extern struct thread *idle_thread; +void idle_thread_fn(void *unused); + +#define RUNNABLE_FLAG 0x00000001 + +#define is_runnable(_thread) (_thread->flags & RUNNABLE_FLAG) +#define set_runnable(_thread) (_thread->flags |= RUNNABLE_FLAG) +#define clear_runnable(_thread) (_thread->flags &= ~RUNNABLE_FLAG) + +#define switch_threads(prev, next) arch_switch_threads(prev, next) + + /* Architecture specific setup of thread creation. */ +struct thread* arch_create_thread(char *name, void (*function)(void *), + void *data); + +void init_sched(void); +void run_idle_thread(void); +struct thread* create_thread(char *name, void (*function)(void *), void *data); +void exit_thread(void) __attribute__((noreturn)); +void schedule(void); + +#ifdef __INSIDE_MINIOS__ +#define current get_current() +#endif + +void wake(struct thread *thread); +void block(struct thread *thread); +void msleep(uint32_t millisecs); + +#endif /* __SCHED_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/semaphore.h xen-4.9.2/extras/mini-os/include/semaphore.h --- xen-4.9.0/extras/mini-os/include/semaphore.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/semaphore.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,110 @@ +#ifndef _SEMAPHORE_H_ +#define _SEMAPHORE_H_ + +#include +#include + +/* + * Implementation of semaphore in Mini-os is simple, because + * there are no preemptive threads, the atomicity is guaranteed. + */ + +struct semaphore +{ + int count; + struct wait_queue_head wait; +}; + +/* + * the semaphore definition + */ +struct rw_semaphore { + signed long count; + spinlock_t wait_lock; + int debug; +}; + +#define __SEMAPHORE_INITIALIZER(name, n) \ +{ \ + .count = n, \ + .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ +} + +#define __MUTEX_INITIALIZER(name) \ + __SEMAPHORE_INITIALIZER(name,1) + +#define __DECLARE_SEMAPHORE_GENERIC(name,count) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name,count) + +#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1) + +#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0) + +static inline void init_SEMAPHORE(struct semaphore *sem, int count) +{ + sem->count = count; + init_waitqueue_head(&sem->wait); +} + +#define init_MUTEX(sem) init_SEMAPHORE(sem, 1) + +static inline int trydown(struct semaphore *sem) +{ + unsigned long flags; + int ret = 0; + local_irq_save(flags); + if (sem->count > 0) { + ret = 1; + sem->count--; + } + local_irq_restore(flags); + return ret; +} + +static void inline down(struct semaphore *sem) +{ + unsigned long flags; + while (1) { + wait_event(sem->wait, sem->count > 0); + local_irq_save(flags); + if (sem->count > 0) + break; + local_irq_restore(flags); + } + sem->count--; + local_irq_restore(flags); +} + +static void inline up(struct semaphore *sem) +{ + unsigned long flags; + local_irq_save(flags); + sem->count++; + wake_up(&sem->wait); + local_irq_restore(flags); +} + +/* FIXME! Thre read/write semaphores are unimplemented! */ +static inline void init_rwsem(struct rw_semaphore *sem) +{ + sem->count = 1; +} + +static inline void down_read(struct rw_semaphore *sem) +{ +} + + +static inline void up_read(struct rw_semaphore *sem) +{ +} + +static inline void up_write(struct rw_semaphore *sem) +{ +} + +static inline void down_write(struct rw_semaphore *sem) +{ +} + +#endif /* _SEMAPHORE_H */ diff -Nru xen-4.9.0/extras/mini-os/include/spinlock.h xen-4.9.2/extras/mini-os/include/spinlock.h --- xen-4.9.0/extras/mini-os/include/spinlock.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/spinlock.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,55 @@ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#include + +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + */ + +typedef struct { + volatile unsigned int slock; +} spinlock_t; + + +#include + + +#define SPINLOCK_MAGIC 0xdead4ead + +#define SPIN_LOCK_UNLOCKED ARCH_SPIN_LOCK_UNLOCKED + +#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define spin_is_locked(x) arch_spin_is_locked(x) + +#define spin_unlock_wait(x) arch_spin_unlock_wait(x) + + +#define _spin_trylock(lock) ({_raw_spin_trylock(lock) ? \ + 1 : ({ 0;});}) + +#define _spin_lock(lock) \ +do { \ + _raw_spin_lock(lock); \ +} while(0) + +#define _spin_unlock(lock) \ +do { \ + _raw_spin_unlock(lock); \ +} while (0) + + +#define spin_lock(lock) _spin_lock(lock) +#define spin_unlock(lock) _spin_unlock(lock) + +#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/sys/lock.h xen-4.9.2/extras/mini-os/include/sys/lock.h --- xen-4.9.0/extras/mini-os/include/sys/lock.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/sys/lock.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,52 @@ +#ifndef _MINIOS_SYS_LOCK_H_ +#define _MINIOS_SYS_LOCK_H_ + +#ifdef HAVE_LIBC + +/* Due to inclusion loop, we can not include sched.h, so have to hide things */ + +#include + + +typedef struct { + int busy; + struct wait_queue_head wait; +} _LOCK_T; + +#define __LOCK_INIT(class,lock) \ + class _LOCK_T lock = { .wait = __WAIT_QUEUE_HEAD_INITIALIZER(lock.wait) } +int ___lock_init(_LOCK_T *lock); +int ___lock_acquire(_LOCK_T *lock); +int ___lock_try_acquire(_LOCK_T *lock); +int ___lock_release(_LOCK_T *lock); +int ___lock_close(_LOCK_T *lock); +#define __lock_init(__lock) ___lock_init(&__lock) +#define __lock_acquire(__lock) ___lock_acquire(&__lock) +#define __lock_release(__lock) ___lock_release(&__lock) +#define __lock_try_acquire(__lock) ___lock_try_acquire(&__lock) +#define __lock_close(__lock) 0 + + +typedef struct { + struct thread *owner; + int count; + struct wait_queue_head wait; +} _LOCK_RECURSIVE_T; + +#define __LOCK_INIT_RECURSIVE(class, lock) \ + class _LOCK_RECURSIVE_T lock = { .wait = __WAIT_QUEUE_HEAD_INITIALIZER((lock).wait) } + +int ___lock_init_recursive(_LOCK_RECURSIVE_T *lock); +int ___lock_acquire_recursive(_LOCK_RECURSIVE_T *lock); +int ___lock_try_acquire_recursive(_LOCK_RECURSIVE_T *lock); +int ___lock_release_recursive(_LOCK_RECURSIVE_T *lock); +int ___lock_close_recursive(_LOCK_RECURSIVE_T *lock); +#define __lock_init_recursive(__lock) ___lock_init_recursive(&__lock) +#define __lock_acquire_recursive(__lock) ___lock_acquire_recursive(&__lock) +#define __lock_release_recursive(__lock) ___lock_release_recursive(&__lock) +#define __lock_try_acquire_recursive(__lock) ___lock_try_acquire_recursive(&__lock) +#define __lock_close_recursive(__lock) 0 + +#endif + +#endif /* _MINIOS_SYS_LOCK_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/sys/time.h xen-4.9.2/extras/mini-os/include/sys/time.h --- xen-4.9.0/extras/mini-os/include/sys/time.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/sys/time.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,47 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: time.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * Robert Kaiser (kaiser@informatik.fh-wiesbaden.de) + * + * Date: Jul 2003, changes: Jun 2005, Sep 2006 + * + * Environment: Xen Minimal OS + * Description: Time and timer functions + * + **************************************************************************** + */ + +#ifndef _MINIOS_SYS_TIME_H_ +#define _MINIOS_SYS_TIME_H_ + +#ifdef HAVE_LIBC +#include_next + +#else +struct timespec { + time_t tv_sec; + long tv_nsec; +}; + +struct timezone { +}; + +struct timeval { + time_t tv_sec; /* seconds */ + suseconds_t tv_usec; /* microseconds */ +}; + +int gettimeofday(struct timeval *tv, void *tz); + +#endif +#ifdef HAVE_LIBC +#include +#endif + +#endif /* _MINIOS_SYS_TIME_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/time.h xen-4.9.2/extras/mini-os/include/time.h --- xen-4.9.0/extras/mini-os/include/time.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/time.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,63 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: time.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * Robert Kaiser (kaiser@informatik.fh-wiesbaden.de) + * + * Date: Jul 2003, changes: Jun 2005, Sep 2006 + * + * Environment: Xen Minimal OS + * Description: Time and timer functions + * + **************************************************************************** + */ + +#ifndef _MINIOS_TIME_H_ +#define _MINIOS_TIME_H_ +#include + +/* + * System Time + * 64 bit value containing the nanoseconds elapsed since boot time. + * This value is adjusted by frequency drift. + * NOW() returns the current time. + * The other macros are for convenience to approximate short intervals + * of real time into system time + */ +typedef int64_t s_time_t; +#define NOW() ((s_time_t)monotonic_clock()) +#define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL ) +#define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL ) +#define HUNDREDTHS(_hs) (((s_time_t)(_hs)) * 10000000UL ) +#define MILLISECS(_ms) (((s_time_t)(_ms)) * 1000000UL ) +#define MICROSECS(_us) (((s_time_t)(_us)) * 1000UL ) +#define Time_Max ((s_time_t) 0x7fffffffffffffffLL) +#define FOREVER Time_Max +#define NSEC_TO_USEC(_nsec) ((_nsec) / 1000UL) +#define NSEC_TO_MSEC(_nsec) ((_nsec) / 1000000ULL) +#define NSEC_TO_SEC(_nsec) ((_nsec) / 1000000000ULL) + +/* wall clock time */ +typedef long time_t; +typedef long suseconds_t; + +#include + +#ifdef HAVE_LIBC +#include_next +#endif + +/* prototypes */ +void init_time(void); +void fini_time(void); +s_time_t get_s_time(void); +s_time_t get_v_time(void); +uint64_t monotonic_clock(void); +void block_domain(s_time_t until); + +#endif /* _MINIOS_TIME_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/tpmback.h xen-4.9.2/extras/mini-os/include/tpmback.h --- xen-4.9.0/extras/mini-os/include/tpmback.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/tpmback.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/xen/tpmback/tpmback.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself derived from drivers/xen/netback/netback.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2002-2004, K A Fraser + * + * This code has also been derived from drivers/xen/tpmback/xenbus.c + * from the xen 2.6.18 linux kernel + * + * Copyright (C) 2005 IBM Corporation + * Copyright (C) 2005 Rusty Russell + * + * This code has also been derived from drivers/xen/tpmback/interface.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself also derived from drvivers/xen/netback/interface.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2 + * of the License + */ + +#include +#include +#include +#include +#ifndef TPMBACK_H +#define TPMBACK_H + +struct tpmcmd { + domid_t domid; /* Domid of the frontend */ + uint8_t locality; /* Locality requested by the frontend */ + unsigned int handle; /* Handle of the frontend */ + void *opaque; /* Opaque pointer taken from the tpmback instance */ + + uint8_t* req; /* tpm command bits, allocated by driver, DON'T FREE IT */ + unsigned int req_len; /* Size of the command in buf - set by tpmback driver */ + unsigned int resp_len; /* Size of the outgoing command, + you set this before passing the cmd object to tpmback_resp */ + uint8_t* resp; /* Buffer for response - YOU MUST ALLOCATE IT, YOU MUST ALSO FREE IT */ +}; +typedef struct tpmcmd tpmcmd_t; + +/* Initialize the tpm backend driver */ +void init_tpmback(void (*open_cb)(domid_t, unsigned int), void (*close_cb)(domid_t, unsigned int)); + +/* Shutdown tpm backend driver */ +void shutdown_tpmback(void); + +/* Blocks until a tpm command is sent from any front end. + * Returns a pointer to the tpm command to handle. + * Do not try to free this pointer or the req buffer + * This function will return NULL if the tpm backend driver + * is shutdown or any other error occurs */ +tpmcmd_t* tpmback_req_any(void); + +/* Blocks until a tpm command from the frontend at domid/handle + * is sent. + * Returns NULL if domid/handle is not connected, tpmback is + * shutdown or shutting down, or if there is an error + */ +tpmcmd_t* tpmback_req(domid_t domid, unsigned int handle); + +/* Send the response to the tpm command back to the frontend + * This function will free the tpmcmd object, but you must free the resp + * buffer yourself */ +void tpmback_resp(tpmcmd_t* tpmcmd); + +/* Waits for the first frontend to connect and then sets domid and handle appropriately. + * If one or more frontends are already connected, this will set domid and handle to one + * of them arbitrarily. The main use for this function is to wait until a single + * frontend connection has occured. + * returns 0 on success, non-zero on failure */ +int tpmback_wait_for_frontend_connect(domid_t *domid, unsigned int *handle); + +/* returns the number of frontends connected */ +int tpmback_num_frontends(void); + +/* Returns the uuid of the specified frontend, NULL on error. + * The return value is internally allocated, so don't free it */ +unsigned char* tpmback_get_uuid(domid_t domid, unsigned int handle); + +/* Get and set the opaque pointer for a tpmback instance */ +void* tpmback_get_opaque(domid_t domid, unsigned int handle); +/* Returns zero if successful, nonzero on failure (no such frontend) */ +int tpmback_set_opaque(domid_t domid, unsigned int handle, void* opaque); + +/* Get the XSM context of the given domain (using the tpmback event channel) */ +int tpmback_get_peercontext(domid_t domid, unsigned int handle, void* buffer, int buflen); +#endif diff -Nru xen-4.9.0/extras/mini-os/include/tpmfront.h xen-4.9.2/extras/mini-os/include/tpmfront.h --- xen-4.9.0/extras/mini-os/include/tpmfront.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/tpmfront.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/char/tpm_vtpm.c + * from the xen 2.6.18 linux kernel + * + * Copyright (C) 2006 IBM Corporation + * + * This code has also been derived from drivers/char/tpm_xen.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself derived from drivers/xen/netfront/netfront.c + * from the linux kernel + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ +#ifndef TPMFRONT_H +#define TPMFRONT_H + +#include +#include +#include +#include +#include +#include +#include + +struct tpmfront_dev { + grant_ref_t ring_ref; + evtchn_port_t evtchn; + + tpmif_shared_page_t *page; + + domid_t bedomid; + char* nodename; + char* bepath; + + XenbusState state; + + uint8_t waiting; + struct wait_queue_head waitq; + + uint8_t* respbuf; + size_t resplen; + +#ifdef HAVE_LIBC + int fd; +#endif + +}; + + +/*Initialize frontend */ +struct tpmfront_dev* init_tpmfront(const char* nodename); +/*Shutdown frontend */ +void shutdown_tpmfront(struct tpmfront_dev* dev); + +/* Send a tpm command to the backend and wait for the response + * + * @dev - frontend device + * @req - request buffer + * @reqlen - length of request buffer + * @resp - *resp will be set to internal response buffer, don't free it! Value is undefined on error + * @resplen - *resplen will be set to the length of the response. Value is undefined on error + * + * returns 0 on success, non zero on failure. + * */ +int tpmfront_cmd(struct tpmfront_dev* dev, uint8_t* req, size_t reqlen, uint8_t** resp, size_t* resplen); + +/* Set the locality used for communicating with a vTPM */ +int tpmfront_set_locality(struct tpmfront_dev* dev, int locality); + +#ifdef HAVE_LIBC +#include +/* POSIX IO functions: + * use tpmfront_open() to get a file descriptor to the tpm device + * use write() on the fd to send a command to the backend. You must + * include the entire command in a single call to write(). + * use read() on the fd to read the response. You can use + * fstat() to get the size of the response and lseek() to seek on it. + */ +int tpmfront_open(struct tpmfront_dev* dev); +int tpmfront_posix_read(int fd, uint8_t* buf, size_t count); +int tpmfront_posix_write(int fd, const uint8_t* buf, size_t count); +int tpmfront_posix_fstat(int fd, struct stat* buf); +#endif + + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/tpm_tis.h xen-4.9.2/extras/mini-os/include/tpm_tis.h --- xen-4.9.0/extras/mini-os/include/tpm_tis.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/tpm_tis.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/char/tpm.c + * from the linux kernel + * + * Copyright (C) 2004 IBM Corporation + * + * This code has also been derived from drivers/char/tpm/tpm_tis.c + * from the linux kernel + * + * Copyright (C) 2005, 2006 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2 + * of the License + */ +#ifndef TPM_TIS_H +#define TPM_TIS_H + +#include +#include + +#define TPM_TIS_EN_LOCL0 1 +#define TPM_TIS_EN_LOCL1 (1 << 1) +#define TPM_TIS_EN_LOCL2 (1 << 2) +#define TPM_TIS_EN_LOCL3 (1 << 3) +#define TPM_TIS_EN_LOCL4 (1 << 4) +#define TPM_TIS_EN_LOCLALL (TPM_TIS_EN_LOCL0 | TPM_TIS_EN_LOCL1 | TPM_TIS_EN_LOCL2 | TPM_TIS_EN_LOCL3 | TPM_TIS_EN_LOCL4) +#define TPM_TIS_LOCL_INT_TO_FLAG(x) (1 << x) +#define TPM_BASEADDR 0xFED40000 +#define TPM_PROBE_IRQ 0xFFFF + +struct tpm_chip; + +struct tpm_chip* init_tpm_tis(unsigned long baseaddr, int localities, unsigned int irq); +struct tpm_chip* init_tpm2_tis(unsigned long baseaddr, int localities, unsigned int irq); +void shutdown_tpm_tis(struct tpm_chip* tpm); + +int tpm_tis_request_locality(struct tpm_chip* tpm, int locality); +int tpm_tis_cmd(struct tpm_chip* tpm, uint8_t* req, size_t reqlen, uint8_t** resp, size_t* resplen); + +#ifdef HAVE_LIBC +#include +#include +/* POSIX IO functions: + * use tpm_tis_open() to get a file descriptor to the tpm device + * use write() on the fd to send a command to the backend. You must + * include the entire command in a single call to write(). + * use read() on the fd to read the response. You can use + * fstat() to get the size of the response and lseek() to seek on it. + */ +int tpm_tis_open(struct tpm_chip* tpm); +int tpm_tis_posix_read(int fd, uint8_t* buf, size_t count); +int tpm_tis_posix_write(int fd, const uint8_t* buf, size_t count); +int tpm_tis_posix_fstat(int fd, struct stat* buf); +#endif + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/types.h xen-4.9.2/extras/mini-os/include/types.h --- xen-4.9.0/extras/mini-os/include/types.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/types.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,74 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: types.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: May 2003 + * + * Environment: Xen Minimal OS + * Description: a random collection of type definitions + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + */ + +#ifndef _TYPES_H_ +#define _TYPES_H_ +#include + +/* FreeBSD compat types */ +#ifndef HAVE_LIBC +typedef unsigned char u_char; +typedef unsigned int u_int; +typedef unsigned long u_long; +#endif +#if defined(__i386__) || defined(__arm__) +typedef long long quad_t; +typedef unsigned long long u_quad_t; +#elif defined(__x86_64__) +typedef long quad_t; +typedef unsigned long u_quad_t; +#endif /* __i386__ || __x86_64__ */ + +#ifdef HAVE_LIBC +#include +#include +#else +#if defined(__i386__) || defined(__arm__) +typedef unsigned int uintptr_t; +typedef int intptr_t; +#elif defined(__x86_64__) || defined(__aarch64__) +typedef unsigned long uintptr_t; +typedef long intptr_t; +#endif /* __i386__ || __x86_64__ */ +typedef unsigned char uint8_t; +typedef signed char int8_t; +typedef unsigned short uint16_t; +typedef signed short int16_t; +typedef unsigned int uint32_t; +typedef signed int int32_t; +#if defined(__i386__) || defined(__arm__) +typedef signed long long int64_t; +typedef unsigned long long uint64_t; +#elif defined(__x86_64__) || defined(__aarch64__) +typedef signed long int64_t; +typedef unsigned long uint64_t; +#endif +typedef uint64_t uintmax_t; +typedef int64_t intmax_t; +typedef int64_t off_t; +#endif + +typedef intptr_t ptrdiff_t; + + +#ifndef HAVE_LIBC +typedef long ssize_t; +#endif + +#endif /* _TYPES_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/wait.h xen-4.9.2/extras/mini-os/include/wait.h --- xen-4.9.0/extras/mini-os/include/wait.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/wait.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,105 @@ +#ifndef __WAIT_H__ +#define __WAIT_H__ + +#include +#include +#include + +#define DEFINE_WAIT(name) \ +struct wait_queue name = { \ + .thread = get_current(), \ + .waiting = 0, \ +} + + +static inline void init_waitqueue_head(struct wait_queue_head *h) +{ + MINIOS_STAILQ_INIT(h); +} + +static inline void init_waitqueue_entry(struct wait_queue *q, struct thread *thread) +{ + q->thread = thread; + q->waiting = 0; +} + +static inline void add_wait_queue(struct wait_queue_head *h, struct wait_queue *q) +{ + if (!q->waiting) { + MINIOS_STAILQ_INSERT_HEAD(h, q, thread_list); + q->waiting = 1; + } +} + +static inline void remove_wait_queue(struct wait_queue_head *h, struct wait_queue *q) +{ + if (q->waiting) { + MINIOS_STAILQ_REMOVE(h, q, struct wait_queue, thread_list); + q->waiting = 0; + } +} + +static inline void wake_up(struct wait_queue_head *head) +{ + unsigned long flags; + struct wait_queue *curr, *tmp; + local_irq_save(flags); + MINIOS_STAILQ_FOREACH_SAFE(curr, head, thread_list, tmp) + wake(curr->thread); + local_irq_restore(flags); +} + +#define add_waiter(w, wq) do { \ + unsigned long flags; \ + local_irq_save(flags); \ + add_wait_queue(&wq, &w); \ + block(get_current()); \ + local_irq_restore(flags); \ +} while (0) + +#define remove_waiter(w, wq) do { \ + unsigned long flags; \ + local_irq_save(flags); \ + remove_wait_queue(&wq, &w); \ + local_irq_restore(flags); \ +} while (0) + +#define wait_event_deadline(wq, condition, deadline) do { \ + unsigned long flags; \ + DEFINE_WAIT(__wait); \ + if(condition) \ + break; \ + for(;;) \ + { \ + /* protect the list */ \ + local_irq_save(flags); \ + add_wait_queue(&wq, &__wait); \ + get_current()->wakeup_time = deadline; \ + clear_runnable(get_current()); \ + local_irq_restore(flags); \ + if((condition) || (deadline && NOW() >= deadline)) \ + break; \ + schedule(); \ + } \ + local_irq_save(flags); \ + /* need to wake up */ \ + wake(get_current()); \ + remove_wait_queue(&wq, &__wait); \ + local_irq_restore(flags); \ +} while(0) + +#define wait_event(wq, condition) wait_event_deadline(wq, condition, 0) + + + +#endif /* __WAIT_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/waittypes.h xen-4.9.2/extras/mini-os/include/waittypes.h --- xen-4.9.0/extras/mini-os/include/waittypes.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/waittypes.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,32 @@ +#ifndef __WAITTYPE_H__ +#define __WAITTYPE_H__ + +#include + +struct thread; +struct wait_queue +{ + int waiting; + struct thread *thread; + MINIOS_STAILQ_ENTRY(struct wait_queue) thread_list; +}; + +/* TODO - lock required? */ +MINIOS_STAILQ_HEAD(wait_queue_head, struct wait_queue); + +#define DECLARE_WAIT_QUEUE_HEAD(name) \ + struct wait_queue_head name = MINIOS_STAILQ_HEAD_INITIALIZER(name) + +#define __WAIT_QUEUE_HEAD_INITIALIZER(name) MINIOS_STAILQ_HEAD_INITIALIZER(name) + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/x86/arch_endian.h xen-4.9.2/extras/mini-os/include/x86/arch_endian.h --- xen-4.9.0/extras/mini-os/include/x86/arch_endian.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/arch_endian.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef ARCH_ENDIAN_H +#error "Do not include arch_endian by itself, include endian.h" +#else + +#define __BYTE_ORDER __LITTLE_ENDIAN + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/x86/arch_limits.h xen-4.9.2/extras/mini-os/include/x86/arch_limits.h --- xen-4.9.0/extras/mini-os/include/x86/arch_limits.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/arch_limits.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,20 @@ + +#ifndef __ARCH_LIMITS_H__ +#define __ARCH_LIMITS_H__ + +#define __PAGE_SHIFT 12 + +#ifdef __ASSEMBLY__ +#define __PAGE_SIZE (1 << __PAGE_SHIFT) +#else +#ifdef __x86_64__ +#define __PAGE_SIZE (1UL << __PAGE_SHIFT) +#else +#define __PAGE_SIZE (1ULL << __PAGE_SHIFT) +#endif +#endif + +#define __STACK_SIZE_PAGE_ORDER 4 +#define __STACK_SIZE (__PAGE_SIZE * (1 << __STACK_SIZE_PAGE_ORDER)) + +#endif /* __ARCH_LIMITS_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/x86/arch_mm.h xen-4.9.2/extras/mini-os/include/x86/arch_mm.h --- xen-4.9.0/extras/mini-os/include/x86/arch_mm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/arch_mm.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,288 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * Copyright (c) 2005, Keir A Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _ARCH_MM_H_ +#define _ARCH_MM_H_ + +#ifndef __ASSEMBLY__ +#include +#if defined(__i386__) +#include +#define __CONST(x) x ## ULL +#elif defined(__x86_64__) +#include +#define __CONST(x) x ## UL +#else +#error "Unsupported architecture" +#endif +#define CONST(x) __CONST(x) +#else +#define CONST(x) x +#endif + +/* + * Physical address space usage: + * + * 0..._edata: kernel text/data + * *stack : kernel stack (thread 0) + * hypervisor allocated data: p2m_list, start_info page, xenstore page, + * console page, initial page tables + * bitmap of allocated pages + * pages controlled by the page allocator + * + * + * Virtual address space usage: + * + * area x86-64 x86-32 + * ------------------------------------------------------------ + * mapped physical memory 00000000 00000000 + * kernel virtual mappings 8000000000 3f000000 + * demand mappings 100000000000 40000000 + * heap (with libc only) 200000000000 b0000000 + * + */ + +#define L1_FRAME 1 +#define L2_FRAME 2 +#define L3_FRAME 3 + +#define L1_PAGETABLE_SHIFT 12 + +#if defined(__i386__) + +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 4 + +#define PAGETABLE_LEVELS 3 + +#define PADDR_BITS 44 +#define PADDR_MASK ((1ULL << PADDR_BITS)-1) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) + +#define PRIpte "016llx" +#ifndef __ASSEMBLY__ +typedef uint64_t pgentry_t; +#else +#define PTE(val) .long val; .long 0 +#endif + +#define MAX_MEM_SIZE CONST(0x3f000000) +#define VIRT_KERNEL_AREA CONST(0x3f000000) +#define VIRT_DEMAND_AREA CONST(0x40000000) +#define VIRT_HEAP_AREA CONST(0xb0000000) + +#define DEMAND_MAP_PAGES CONST(0x6ffff) +#define HEAP_PAGES_MAX ((HYPERVISOR_VIRT_START - VIRT_HEAP_AREA) / \ + PAGE_SIZE - 1) + +#elif defined(__x86_64__) + +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 +#define L4_PAGETABLE_SHIFT 39 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 512 +#define L4_PAGETABLE_ENTRIES 512 + +#define PAGETABLE_LEVELS 4 + +/* These are page-table limitations. Current CPUs support only 40-bit phys. */ +#define PADDR_BITS 52 +#define VADDR_BITS 48 +#define PADDR_MASK ((1UL << PADDR_BITS)-1) +#define VADDR_MASK ((1UL << VADDR_BITS)-1) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) +#define L3_MASK ((1UL << L4_PAGETABLE_SHIFT) - 1) + +#define PRIpte "016lx" +#ifndef __ASSEMBLY__ +typedef unsigned long pgentry_t; +#else +#define PTE(val) .quad val +#endif + +#define MAX_MEM_SIZE (CONST(512) << 30) +#define VIRT_KERNEL_AREA CONST(0x0000008000000000) +#define VIRT_DEMAND_AREA CONST(0x0000100000000000) +#define VIRT_HEAP_AREA CONST(0x0000200000000000) + +#define DEMAND_MAP_PAGES CONST(0x8000000) +#define HEAP_PAGES_MAX CONST(0x8000000) + +#endif + +#ifndef HAVE_LIBC +#define HEAP_PAGES 0 +#else +#define HEAP_PAGES HEAP_PAGES_MAX +#endif + +#define L1_MASK ((1UL << L2_PAGETABLE_SHIFT) - 1) + +/* Given a virtual address, get an entry offset into a page table. */ +#define l1_table_offset(_a) \ + (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) +#define l2_table_offset(_a) \ + (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) +#define l3_table_offset(_a) \ + (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) +#if defined(__x86_64__) +#define l4_table_offset(_a) \ + (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) +#endif + +#define _PAGE_PRESENT CONST(0x001) +#define _PAGE_RW CONST(0x002) +#define _PAGE_USER CONST(0x004) +#define _PAGE_PWT CONST(0x008) +#define _PAGE_PCD CONST(0x010) +#define _PAGE_ACCESSED CONST(0x020) +#define _PAGE_DIRTY CONST(0x040) +#define _PAGE_PAT CONST(0x080) +#define _PAGE_PSE CONST(0x080) +#define _PAGE_GLOBAL CONST(0x100) + +#if defined(__i386__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT) +#elif defined(__x86_64__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) +#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_USER) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#endif /* __i386__ || __x86_64__ */ + +/* flags for ioremap */ +#define IO_PROT (L1_PROT) +#define IO_PROT_NOCACHE (L1_PROT | _PAGE_PCD) + +#include "arch_limits.h" +#define PAGE_SIZE __PAGE_SIZE +#define PAGE_SHIFT __PAGE_SHIFT +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT) +#define PFN_DOWN(x) ((x) >> L1_PAGETABLE_SHIFT) +#define PFN_PHYS(x) ((uint64_t)(x) << L1_PAGETABLE_SHIFT) +#define PHYS_PFN(x) ((x) >> L1_PAGETABLE_SHIFT) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +#ifndef __ASSEMBLY__ +/* Definitions for machine and pseudophysical addresses. */ +#ifdef __i386__ +typedef unsigned long long paddr_t; +typedef unsigned long long maddr_t; +#else +typedef unsigned long paddr_t; +typedef unsigned long maddr_t; +#endif + +extern pgentry_t *pt_base; +#ifdef CONFIG_PARAVIRT +extern unsigned long *phys_to_machine_mapping; +#else +extern pgentry_t page_table_base[]; +#endif +extern char _text, _etext, _erodata, _edata, _end; +extern unsigned long mfn_zero; +static __inline__ maddr_t phys_to_machine(paddr_t phys) +{ + maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); + return machine; +} + +static __inline__ paddr_t machine_to_phys(maddr_t machine) +{ + paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT); + phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); + return phys; +} + +#define VIRT_START ((unsigned long)&_text) + +#define to_phys(x) ((unsigned long)(x)-VIRT_START) +#define to_virt(x) ((void *)((unsigned long)(x)+VIRT_START)) + +#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define virt_to_mfn(_virt) (pfn_to_mfn(virt_to_pfn(_virt))) +#define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach))) +#define virt_to_mach(_virt) (phys_to_machine(to_phys(_virt))) +#define mfn_to_virt(_mfn) (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT)) +#define pfn_to_virt(_pfn) (to_virt((_pfn) << PAGE_SHIFT)) + +/* Pagetable walking. */ +#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> L1_PAGETABLE_SHIFT) +#define pte_to_virt(_pte) to_virt(mfn_to_pfn(pte_to_mfn(_pte)) << PAGE_SHIFT) + +#ifdef __x86_64__ +#define virtual_to_l3(_virt) ((pgentry_t *)pte_to_virt(pt_base[l4_table_offset(_virt)])) +#else +#define virtual_to_l3(_virt) pt_base +#endif + +#define virtual_to_l2(_virt) ({ \ + unsigned long __virt2 = (_virt); \ + (pgentry_t *) pte_to_virt(virtual_to_l3(__virt2)[l3_table_offset(__virt2)]); \ +}) + +#define virtual_to_l1(_virt) ({ \ + unsigned long __virt1 = (_virt); \ + (pgentry_t *) pte_to_virt(virtual_to_l2(__virt1)[l2_table_offset(__virt1)]); \ +}) + +#define virtual_to_pte(_virt) ({ \ + unsigned long __virt0 = (unsigned long) (_virt); \ + virtual_to_l1(__virt0)[l1_table_offset(__virt0)]; \ +}) +#define virtual_to_mfn(_virt) pte_to_mfn(virtual_to_pte(_virt)) + +#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, L1_PROT) +#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, NULL, L1_PROT_RO) +#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, DOMID_SELF, NULL, L1_PROT_RO) + +pgentry_t *need_pgt(unsigned long addr); +void arch_mm_preinit(void *p); +unsigned long alloc_virt_kernel(unsigned n_pages); + +#ifndef CONFIG_PARAVIRT +void arch_print_memmap(void); +#endif + +#endif + +#endif /* _ARCH_MM_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/x86/arch_sched.h xen-4.9.2/extras/mini-os/include/x86/arch_sched.h --- xen-4.9.0/extras/mini-os/include/x86/arch_sched.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/arch_sched.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,25 @@ + +#ifndef __ARCH_SCHED_H__ +#define __ARCH_SCHED_H__ + +#include "arch_limits.h" + +static inline struct thread* get_current(void) +{ + struct thread **current; +#ifdef __i386__ + register unsigned long sp asm("esp"); +#else + register unsigned long sp asm("rsp"); +#endif + current = (void *)(unsigned long)(sp & ~(__STACK_SIZE-1)); + return *current; +} + +extern void __arch_switch_threads(unsigned long *prevctx, unsigned long *nextctx); + +#define arch_switch_threads(prev,next) __arch_switch_threads(&(prev)->sp, &(next)->sp) + + + +#endif /* __ARCH_SCHED_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/x86/arch_spinlock.h xen-4.9.2/extras/mini-os/include/x86/arch_spinlock.h --- xen-4.9.0/extras/mini-os/include/x86/arch_spinlock.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/arch_spinlock.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,94 @@ + + +#ifndef __ARCH_ASM_SPINLOCK_H +#define __ARCH_ASM_SPINLOCK_H + +#include +#include "os.h" + + +#define ARCH_SPIN_LOCK_UNLOCKED { 1 } + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) <= 0) +#define arch_spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) + +#define spin_lock_string \ + "1:\n" \ + LOCK \ + "decb %0\n\t" \ + "jns 3f\n" \ + "2:\t" \ + "rep;nop\n\t" \ + "cmpb $0,%0\n\t" \ + "jle 2b\n\t" \ + "jmp 1b\n" \ + "3:\n\t" + +#define spin_lock_string_flags \ + "1:\n" \ + LOCK \ + "decb %0\n\t" \ + "jns 4f\n\t" \ + "2:\t" \ + "testl $0x200, %1\n\t" \ + "jz 3f\n\t" \ + "#sti\n\t" \ + "3:\t" \ + "rep;nop\n\t" \ + "cmpb $0, %0\n\t" \ + "jle 3b\n\t" \ + "#cli\n\t" \ + "jmp 1b\n" \ + "4:\n\t" + +/* + * This works. Despite all the confusion. + * (except on PPro SMP or if we are using OOSTORE) + * (PPro errata 66, 92) + */ + +#define spin_unlock_string \ + "xchgb %b0, %1" \ + :"=q" (oldval), "=m" (lock->slock) \ + :"0" (oldval) : "memory" + +static inline void _raw_spin_unlock(spinlock_t *lock) +{ + char oldval = ARCH_SPIN_LOCK_UNLOCKED; + __asm__ __volatile__( + spin_unlock_string + ); +} + +static inline int _raw_spin_trylock(spinlock_t *lock) +{ + char oldval; + __asm__ __volatile__( + "xchgb %b0,%1\n" + :"=q" (oldval), "=m" (lock->slock) + :"0" (0) : "memory"); + return oldval > 0; +} + +static inline void _raw_spin_lock(spinlock_t *lock) +{ + __asm__ __volatile__( + spin_lock_string + :"=m" (lock->slock) : : "memory"); +} + +static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags) +{ + __asm__ __volatile__( + spin_lock_string_flags + :"=m" (lock->slock) : "r" (flags) : "memory"); +} + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/x86/asm_macros.h xen-4.9.2/extras/mini-os/include/x86/asm_macros.h --- xen-4.9.0/extras/mini-os/include/x86/asm_macros.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/asm_macros.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,28 @@ +#ifndef _X86_ASM_MACRO_H_ +#define _X86_ASM_MACRO_H_ + +#ifdef __ASSEMBLY__ +# if defined(__x86_64__) +# define _WORD .quad +# elif defined(__i386__) +# define _WORD .long +# endif +#else +# if defined(__x86_64__) +# define _WORD ".quad" +# elif defined(__i386__) +# define _WORD ".long" +# endif +#endif + +#endif /* _X86_ASM_MACRO_H_ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/x86/desc.h xen-4.9.2/extras/mini-os/include/x86/desc.h --- xen-4.9.0/extras/mini-os/include/x86/desc.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/desc.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,367 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2016 - Juergen Gross, SUSE Linux GmbH + * based on some header files from Xen Test Framework by Andrew Cooper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DESC_H_ +#define _DESC_H_ + +/* + * Count the number of varadic arguments provided. + * + *
+ *   VA_NARGS()     => 0
+ *   VA_NARGS(x)    => 1
+ *   VA_NARGS(x, y) => 2
+ * 
+ * + * Currently functions for 0 to 11 arguments. + */ +#define VA_NARGS_(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, N, ...) N +#define VA_NARGS(...) \ + VA_NARGS_(X,##__VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* + * Call a macro variation, based on the number of varadic arguments. + * + * @param macro Partial token to call a variation of. + * @param c1 Constant parameter to pass through. + * @param ... Varadic arguments to pass through. + * + * Tokenises 'macro' with the count of varadic arguments, passing 'c1' and the + * varadic arguments. + * + *
+ *   VAR_MACRO_C1(m, c)          => m0(c)
+ *   VAR_MACRO_C1(m, c, x)       => m1(c, x)
+ *   VAR_MACRO_C1(m, c, x, y)    => m2(c, x, y)
+ *   VAR_MACRO_C1(m, c, x, y, z) => m3(c, x, y, z)
+ * 
+ */ +#define VAR_MACRO_C1__(macro, c1, count, ...) macro##count(c1, ##__VA_ARGS__) +#define VAR_MACRO_C1_(macro, c1, count, ...) \ + VAR_MACRO_C1__(macro, c1, count, ##__VA_ARGS__) +#define VAR_MACRO_C1(macro, c1, ...) \ + VAR_MACRO_C1_(macro, c1, VA_NARGS(__VA_ARGS__), ##__VA_ARGS__) + +/* + * GDT layout: + * + * 0 - null + * 1 - 64bit supervisor code + * 2 - 32bit supervisor code + * 3 - 32bit supervisor data + * 4 - 64bit userspace code + * 5 - 32bit userspace code + * 6 - 32bit userspace data + * 7 - TSS (two slots in long mode) + * + * 9-12 - Available for test use + */ + +#define GDTE_CS64_DPL0 1 +#define GDTE_CS32_DPL0 2 +#define GDTE_DS32_DPL0 3 +#define GDTE_CS64_DPL3 4 +#define GDTE_CS32_DPL3 5 +#define GDTE_DS32_DPL3 6 + +#define GDTE_TSS 7 + +#define GDTE_AVAIL0 9 +#define GDTE_AVAIL1 10 +#define GDTE_AVAIL2 11 +#define GDTE_AVAIL3 12 + +#define NR_GDT_ENTRIES 13 + +#ifdef __x86_64__ + +#define __KERN_CS (GDTE_CS64_DPL0 * 8) +#define __KERN_DS (0) +#define __USER_CS (GDTE_CS64_DPL3 * 8 + 3) +#define __USER_DS (GDTE_DS32_DPL3 * 8 + 3) + +#else /* __x86_64__ */ + +#define __KERN_CS (GDTE_CS32_DPL0 * 8) +#define __KERN_DS (GDTE_DS32_DPL0 * 8) +#define __USER_CS (GDTE_CS32_DPL3 * 8 + 3) +#define __USER_DS (GDTE_DS32_DPL3 * 8 + 3) + +#endif /* __x86_64__ */ + +#ifndef __ASSEMBLY__ +/* 8 byte user segment descriptor (GDT/LDT entries with .s = 1) */ +struct __packed seg_desc32 { + union { + /* Raw backing integers. */ + struct { + uint32_t lo, hi; + }; + /* Common named fields. */ + struct { + uint16_t limit0; + uint16_t base0; + uint8_t base1; + unsigned type: 4; + unsigned s: 1, dpl: 2, p: 1; + unsigned limit: 4; + unsigned avl: 1, l: 1, d: 1, g: 1; + uint8_t base2; + }; + /* Code segment specific field names. */ + struct { + uint16_t limit0; + uint16_t base0; + uint8_t base1; + unsigned a: 1, r: 1, c: 1, x: 1; + unsigned s: 1, dpl: 2, p: 1; + unsigned limit: 4; + unsigned avl: 1, l: 1, d: 1, g: 1; + uint8_t base2; + } code; + /* Data segment specific field names. */ + struct { + uint16_t limit0; + uint16_t base0; + uint8_t base1; + unsigned a: 1, w: 1, e: 1, x: 1; + unsigned s: 1, dpl: 2, p: 1; + unsigned limit: 4; + unsigned avl: 1, _r0: 1, b: 1, g: 1; + uint8_t base2; + } data; + }; +}; + +/* 8-byte gate - Protected mode IDT entry, GDT task/call gate. */ +struct __packed seg_gate32 { + union { + struct { + uint32_t lo, hi; + }; + struct { + uint16_t offset0; + uint16_t selector; + uint8_t _r0; + unsigned type: 4, s: 1, dpl: 2, p: 1; + uint16_t offset1; + }; + }; +}; + +/* 16-byte gate - Long mode IDT entry. */ +struct __packed seg_gate64 { + union { + struct { + uint64_t lo, hi; + }; + struct { + uint16_t offset0; + uint16_t selector; + unsigned ist: 3, _r0: 5, type: 4, s: 1, dpl: 2, p: 1; + uint16_t offset1; + uint32_t offset2; + uint32_t _r1; + }; + }; +}; + +/* GDT/LDT attribute flags for user segments */ + +/* Common */ +#define SEG_ATTR_G 0x8000 /* Granularity of limit (0 = 1, 1 = 4K) */ +#define SEG_ATTR_AVL 0x1000 /* Available for software use */ +#define SEG_ATTR_P 0x0080 /* Present? */ +#define SEG_ATTR_S 0x0010 /* !System desc (0 = system, 1 = user) */ +#define SEG_ATTR_A 0x0001 /* Accessed? (set by hardware) */ + +#define SEG_ATTR_COMMON 0x8091 /* Commonly set bits (G P S A) */ + +#define SEG_ATTR_DPL0 0x0000 /* Descriptor privilege level 0 */ +#define SEG_ATTR_DPL1 0x0020 /* Descriptor privilege level 1 */ +#define SEG_ATTR_DPL2 0x0040 /* Descriptor privilege level 2 */ +#define SEG_ATTR_DPL3 0x0060 /* Descriptor privilege level 3 */ +#define SEG_ATTR_CODE 0x0008 /* Type (0 = data, 1 = code) */ +#define SEG_ATTR_DATA 0x0000 /* Type (0 = data, 1 = code) */ + +/* Code segments */ +#define SEG_ATTR_D 0x4000 /* Default operand size (0 = 16bit, 1 = 32bit) */ +#define SEG_ATTR_L 0x2000 /* Long segment? (1 = 64bit) */ +#define SEG_ATTR_C 0x0004 /* Conforming? (0 = non, 1 = conforming) */ +#define SEG_ATTR_R 0x0002 /* Readable? (0 = XO seg, 1 = RX seg) */ + +/* Data segments */ +#define SEG_ATTR_B 0x4000 /* 'Big' flag. + * - For %ss, default operand size. + * - For expand-down segment, sets upper bound. */ +#define SEG_ATTR_E 0x0004 /* Expand-down? (0 = normal, 1 = expand-down) */ +#define SEG_ATTR_W 0x0002 /* Writable? (0 = RO seg, 1 = RW seg) */ + +/* + * Initialise an LDT/GDT entry using a raw attribute number. + * + * @param base Segment base. + * @param limit Segment limit. + * @param attr Segment attributes. + */ +#define INIT_GDTE(base, limit, attr) { { { \ + .lo = (((base) & 0xffff) << 16) | ((limit) & 0xffff), \ + .hi = ((base) & 0xff000000) | ((limit) & 0xf0000) | \ + (((attr) & 0xf0ff) << 8) | (((base) & 0xff0000) >> 16) \ + } } } + +/* + * Tokenise and OR together. + * + * For each varadic parameter, tokenise with 't' and OR together. + * + * @param t Common stem partial token. + * @param ... Partial tokens. + * + * Example: + *
+ *   TOK_OR(t, x, y)    => (t ## x | t ## y)
+ *   TOK_OR(t, x, y, z) => (t ## x | t ## y | t ## z)
+ * 
+ */ +#define TOK_OR0(t) (0) +#define TOK_OR1(t, x) (t ## x) +#define TOK_OR2(t, x, ...) (t ## x | TOK_OR1(t, ##__VA_ARGS__)) +#define TOK_OR3(t, x, ...) (t ## x | TOK_OR2(t, ##__VA_ARGS__)) +#define TOK_OR4(t, x, ...) (t ## x | TOK_OR3(t, ##__VA_ARGS__)) +#define TOK_OR5(t, x, ...) (t ## x | TOK_OR4(t, ##__VA_ARGS__)) +#define TOK_OR6(t, x, ...) (t ## x | TOK_OR5(t, ##__VA_ARGS__)) +#define TOK_OR7(t, x, ...) (t ## x | TOK_OR6(t, ##__VA_ARGS__)) +#define TOK_OR8(t, x, ...) (t ## x | TOK_OR7(t, ##__VA_ARGS__)) +#define TOK_OR(t, ...) VAR_MACRO_C1(TOK_OR, t, ##__VA_ARGS__) + +/* + * Initialise an LDT/GDT entry using SEG_ATTR_ mnemonics. + * + * @param base Segment base. + * @param limit Segment limit. + * @param ... Partial SEG_ATTR_ tokens for attributes. + * + * Example usage: + * - INIT_GDTE_SYM(0, 0xfffff, P) + * - uses @ref SEG_ATTR_P + * + * - INIT_GDTE_SYM(0, 0xfffff, CODE, L) + * - uses @ref SEG_ATTR_CODE and @ref SEG_ATTR_L + */ +#define INIT_GDTE_SYM(base, limit, ...) \ + INIT_GDTE(base, limit, TOK_OR(SEG_ATTR_, ##__VA_ARGS__)) + +/* Long mode lgdt/lidt table pointer. */ +struct __packed desc_ptr64 { + uint16_t limit; + uint64_t base; +}; + +/* Protected mode lgdt/lidt table pointer. */ +struct __packed desc_ptr32 { + uint16_t limit; + uint32_t base; +}; + +struct __packed hw_tss32 { + uint16_t link; uint16_t _r0; + + uint32_t esp0; + uint16_t ss0; uint16_t _r1; + + uint32_t esp1; + uint16_t ss1; uint16_t _r2; + + uint32_t esp2; + uint16_t ss2; uint16_t _r3; + + uint32_t cr3; + uint32_t eip; + uint32_t eflags; + uint32_t eax; + uint32_t ecx; + uint32_t edx; + uint32_t ebx; + uint32_t esp; + uint32_t ebp; + uint32_t esi; + uint32_t edi; + + uint16_t es; uint16_t _r4; + uint16_t cs; uint16_t _r5; + uint16_t ss; uint16_t _r6; + uint16_t ds; uint16_t _r7; + uint16_t fs; uint16_t _r8; + uint16_t gs; uint16_t _r9; + uint16_t ldtr; uint16_t _r10; + uint16_t t; uint16_t iopb; +}; + +struct __packed hw_tss64 { + uint16_t link; uint16_t _r0; + + uint64_t rsp0; + uint64_t rsp1; + uint64_t rsp2; + + uint64_t _r1; + + uint64_t ist[7]; /* 1-based structure */ + + uint64_t _r2; + + uint16_t t; + uint16_t iopb; +}; + +#define X86_TSS_INVALID_IO_BITMAP 0x8000 + +#if defined(__x86_64__) + +typedef struct desc_ptr64 desc_ptr; +typedef struct seg_desc32 user_desc; +typedef struct seg_gate64 gate_desc; +typedef struct hw_tss64 hw_tss; + +#elif defined(__i386__) + +typedef struct desc_ptr32 desc_ptr; +typedef struct seg_desc32 user_desc; +typedef struct seg_gate32 gate_desc; +typedef struct hw_tss32 hw_tss; + +#endif + +extern user_desc gdt[NR_GDT_ENTRIES]; +extern desc_ptr gdt_ptr; + +extern gate_desc idt[256]; +extern desc_ptr idt_ptr; + +extern hw_tss tss; + +#endif + +#endif /* _DESC_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/x86/os.h xen-4.9.2/extras/mini-os/include/x86/os.h --- xen-4.9.0/extras/mini-os/include/x86/os.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/os.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,676 @@ +/****************************************************************************** + * os.h + * + * random collection of macros and definition + */ + +#ifndef _OS_H_ +#define _OS_H_ + +#define smp_processor_id() 0 + + +#ifndef __ASSEMBLY__ +#include +#include +#include +#include +#include + +#define USED __attribute__ ((used)) + +#define BUG do_exit + +#endif +#include + +#define MSR_EFER 0xc0000080 +#define _EFER_LME 8 /* Long mode enable */ + +#define X86_CR0_PG 0x80000000 /* Paging */ +#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */ +#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ + +#define X86_EFLAGS_IF 0x00000200 + +#define __KERNEL_CS FLAT_KERNEL_CS +#define __KERNEL_DS FLAT_KERNEL_DS +#define __KERNEL_SS FLAT_KERNEL_SS + +#define TRAP_divide_error 0 +#define TRAP_debug 1 +#define TRAP_nmi 2 +#define TRAP_int3 3 +#define TRAP_overflow 4 +#define TRAP_bounds 5 +#define TRAP_invalid_op 6 +#define TRAP_no_device 7 +#define TRAP_double_fault 8 +#define TRAP_copro_seg 9 +#define TRAP_invalid_tss 10 +#define TRAP_no_segment 11 +#define TRAP_stack_error 12 +#define TRAP_gp_fault 13 +#define TRAP_page_fault 14 +#define TRAP_spurious_int 15 +#define TRAP_copro_error 16 +#define TRAP_alignment_check 17 +#define TRAP_machine_check 18 +#define TRAP_simd_error 19 +#define TRAP_deferred_nmi 31 +#define TRAP_xen_callback 32 + +/* Everything below this point is not included by assembler (.S) files. */ +#ifndef __ASSEMBLY__ + +extern shared_info_t *HYPERVISOR_shared_info; + +void trap_init(void); +void trap_fini(void); + +void arch_fini(void); + + + +#ifdef CONFIG_PARAVIRT + +/* + * The use of 'barrier' in the following reflects their use as local-lock + * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following + * critical operations are executed. All critical operations must complete + * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also + * includes these barriers, for example. + */ + +#define __cli() \ +do { \ + vcpu_info_t *_vcpu; \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ + _vcpu->evtchn_upcall_mask = 1; \ + barrier(); \ +} while (0) + +#define __sti() \ +do { \ + vcpu_info_t *_vcpu; \ + barrier(); \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ + _vcpu->evtchn_upcall_mask = 0; \ + barrier(); /* unmask then check (avoid races) */ \ + if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + force_evtchn_callback(); \ +} while (0) + +#define __save_flags(x) \ +do { \ + vcpu_info_t *_vcpu; \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ + (x) = _vcpu->evtchn_upcall_mask; \ +} while (0) + +#define __restore_flags(x) \ +do { \ + vcpu_info_t *_vcpu; \ + barrier(); \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ + if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ + barrier(); /* unmask then check (avoid races) */ \ + if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + force_evtchn_callback(); \ + }\ +} while (0) + +#define safe_halt() ((void)0) + +#define __save_and_cli(x) \ +do { \ + vcpu_info_t *_vcpu; \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ + (x) = _vcpu->evtchn_upcall_mask; \ + _vcpu->evtchn_upcall_mask = 1; \ + barrier(); \ +} while (0) + +#define irqs_disabled() \ + HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].evtchn_upcall_mask + +#else + +#if defined(__i386__) +#define __SZ "l" +#define __REG "e" +#else +#define __SZ "q" +#define __REG "r" +#endif + +#define __cli() asm volatile ( "cli" : : : "memory" ) +#define __sti() asm volatile ( "sti" : : : "memory" ) + +#define __save_flags(x) \ +do { \ + unsigned long __f; \ + asm volatile ( "pushf" __SZ " ; pop" __SZ " %0" : "=g" (__f)); \ + x = (__f & X86_EFLAGS_IF) ? 1 : 0; \ +} while (0) + +#define __restore_flags(x) \ +do { \ + if (x) __sti(); \ + else __cli(); \ +} while (0) + +#define __save_and_cli(x) \ +do { \ + __save_flags(x); \ + __cli(); \ +} while (0) + +static inline int irqs_disabled(void) +{ + int flag; + + __save_flags(flag); + return !flag; +} + +#endif + +#ifdef __INSIDE_MINIOS__ +#define local_irq_save(x) __save_and_cli(x) +#define local_irq_restore(x) __restore_flags(x) +#define local_save_flags(x) __save_flags(x) +#define local_irq_disable() __cli() +#define local_irq_enable() __sti() +#else +unsigned long __local_irq_save(void); +void __local_irq_restore(unsigned long flags); +unsigned long __local_save_flags(void); +void __local_irq_disable(void); +void __local_irq_enable(void); +#define local_irq_save(x) x = __local_irq_save() +#define local_irq_restore(x) __local_irq_restore(x) +#define local_save_flags(x) x = __local_save_flags() +#define local_irq_disable() __local_irq_disable() +#define local_irq_enable() __local_irq_enable() +#endif + +/* This is a barrier for the compiler only, NOT the processor! */ +#define barrier() __asm__ __volatile__("": : :"memory") + +#if defined(__i386__) +#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define wmb() __asm__ __volatile__ ("": : :"memory") +#elif defined(__x86_64__) +#define mb() __asm__ __volatile__ ("mfence":::"memory") +#define rmb() __asm__ __volatile__ ("lfence":::"memory") +#define wmb() __asm__ __volatile__ ("sfence" ::: "memory") /* From CONFIG_UNORDERED_IO (linux) */ +#endif + + +#define LOCK_PREFIX "" +#define LOCK "" +#define ADDR (*(volatile long *) addr) +/* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, + * not some alias that contains the same information. + */ +typedef struct { volatile int counter; } atomic_t; + +static inline void write_cr3(unsigned long cr3) +{ + asm volatile( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); +} + +static inline void invlpg(unsigned long va) +{ + asm volatile ( "invlpg %0": : "m" (*(const char *)(va)) : "memory" ); +} + +/************************** i386 *******************************/ +#ifdef __INSIDE_MINIOS__ +#if defined (__i386__) + +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) +struct __xchg_dummy { unsigned long a[100]; }; +#define __xg(x) ((struct __xchg_dummy *)(x)) +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It can be reorderdered on other architectures other than x86. + * It also implies a memory barrier. + */ +static inline int test_and_clear_bit(int nr, volatile unsigned long * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +static inline int constant_test_bit(int nr, const volatile unsigned long *addr) +{ + return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0; +} + +static inline int variable_test_bit(int nr, const volatile unsigned long * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"Ir" (nr)); + return oldbit; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note: there are no guarantees that this function will not be reordered + * on non x86 architectures, so if you are writting portable code, + * make sure not to rely on its reordering guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void set_bit(int nr, volatile unsigned long * addr) +{ + __asm__ __volatile__( LOCK + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static inline void clear_bit(int nr, volatile unsigned long * addr) +{ + __asm__ __volatile__( LOCK + "btrl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + __asm__("bsfl %1,%0" + :"=r" (word) + :"rm" (word)); + return word; +} + + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ +#define ADDR (*(volatile long *) addr) + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + + + +#elif defined(__x86_64__)/* ifdef __i386__ */ +/************************** x86_84 *******************************/ + +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) +#define __xg(x) ((volatile long *)(x)) +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %k0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 8: + __asm__ __volatile__("xchgq %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"dIr" (nr) : "memory"); + return oldbit; +} + +static __inline__ int constant_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int variable_test_bit(int nr, volatile const void * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"dIr" (nr)); + return oldbit; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btsl %1,%0" + :"=m" (ADDR) + :"dIr" (nr) : "memory"); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static __inline__ void clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btrl %1,%0" + :"=m" (ADDR) + :"dIr" (nr)); +} + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __inline__ unsigned long __ffs(unsigned long word) +{ + __asm__("bsfq %1,%0" + :"=r" (word) + :"rm" (word)); + return word; +} + +#define ADDR (*(volatile long *) addr) + +#define rdtscll(val) do { \ + unsigned int __a,__d; \ + asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \ + (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ +} while(0) + +#else /* ifdef __x86_64__ */ +#error "Unsupported architecture" +#endif + +/********************* common i386 and x86_64 ****************************/ +#define xen_mb() mb() +#define xen_rmb() rmb() +#define xen_wmb() wmb() +#define xen_barrier() asm volatile ( "" : : : "memory") + +#endif /* ifdef __INSIDE_MINIOS */ + +#define wrmsr(msr,val1,val2) \ + __asm__ __volatile__("wrmsr" \ + : /* no outputs */ \ + : "c" (msr), "a" (val1), "d" (val2)) + +static inline void wrmsrl(unsigned msr, uint64_t val) +{ + wrmsr(msr, (uint32_t)(val & 0xffffffffULL), (uint32_t)(val >> 32)); +} + +struct __synch_xchg_dummy { unsigned long a[100]; }; +#define __synch_xg(x) ((struct __synch_xchg_dummy *)(x)) + +#define synch_cmpxchg(ptr, old, new) \ +((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\ + (unsigned long)(old), \ + (unsigned long)(new), \ + sizeof(*(ptr)))) + +static inline unsigned long __synch_cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("lock; cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("lock; cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#ifdef __x86_64__ + case 4: + __asm__ __volatile__("lock; cmpxchgl %k1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + case 8: + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#else + case 4: + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#endif + } + return old; +} + + +static __inline__ void synch_set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btsl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ void synch_clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btrl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "lock btsl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "lock btrl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int synch_const_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & + (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int synch_var_test_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "btl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) ); + return oldbit; +} + +#define synch_test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + synch_const_test_bit((nr),(addr)) : \ + synch_var_test_bit((nr),(addr))) + +static inline int +HYPERVISOR_xsm_op( + struct xen_flask_op *op) +{ + return _hypercall1(int, xsm_op, op); +} + +static inline void cpuid(uint32_t leaf, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + asm volatile ("cpuid" + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) + : "0" (leaf)); +} + +#undef ADDR + +#ifdef CONFIG_PARAVIRT +static inline unsigned long read_cr2(void) +{ + return HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].arch.cr2; +} +#else +static inline unsigned long read_cr2(void) +{ + unsigned long cr2; + + asm volatile ( "mov %%cr2,%0\n\t" : "=r" (cr2) ); + return cr2; +} +#endif + +#endif /* not assembly */ +#endif /* _OS_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/x86/traps.h xen-4.9.2/extras/mini-os/include/x86/traps.h --- xen-4.9.0/extras/mini-os/include/x86/traps.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/traps.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,78 @@ +/* + **************************************************************************** + * (C) 2005 - Grzegorz Milos - Intel Reseach Cambridge + **************************************************************************** + * + * File: traps.h + * Author: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: Jun 2005 + * + * Environment: Xen Minimal OS + * Description: Deals with traps + * + **************************************************************************** + */ + +#ifndef _TRAPS_H_ +#define _TRAPS_H_ + +#ifdef __i386__ +struct pt_regs { + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; +}; +#elif __x86_64__ + +struct pt_regs { + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long rbp; + unsigned long rbx; +/* arguments: non interrupts/non tracing syscalls only save upto here*/ + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long rax; + unsigned long rcx; + unsigned long rdx; + unsigned long rsi; + unsigned long rdi; + unsigned long orig_rax; +/* end of arguments */ +/* cpu exception frame or undefined */ + unsigned long rip; + unsigned long cs; + unsigned long eflags; + unsigned long rsp; + unsigned long ss; +/* top of stack page */ +}; + + +#endif + +void dump_regs(struct pt_regs *regs); +void stack_walk(void); + +#define TRAP_PF_PROT 0x1 +#define TRAP_PF_WRITE 0x2 +#define TRAP_PF_USER 0x4 + +#endif /* _TRAPS_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/x86/x86_32/arch_wordsize.h xen-4.9.2/extras/mini-os/include/x86/x86_32/arch_wordsize.h --- xen-4.9.0/extras/mini-os/include/x86/x86_32/arch_wordsize.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/x86_32/arch_wordsize.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1 @@ +#define __WORDSIZE 32 diff -Nru xen-4.9.0/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h xen-4.9.2/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h --- xen-4.9.0/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,343 @@ +/****************************************************************************** + * hypercall-x86_32.h + * + * Copied from XenLinux. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_X86_32_H__ +#define __HYPERCALL_X86_32_H__ + +#include +#include +#include +#include + +typedef struct { unsigned long pte_low, pte_high; } pte_t; + +#define __pte(x) ({ unsigned long long _x = (x); \ + ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) + +#define __STR(x) #x +#define STR(x) __STR(x) + +extern char hypercall_page[PAGE_SIZE]; + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res) \ + : \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)), \ + "5" ((long)(a5)) \ + : "memory" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_set_callbacks( + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline int +HYPERVISOR_shutdown( + unsigned int reason) +{ + struct sched_shutdown shutdown = { .reason = reason }; + return _hypercall2(int, sched_op, SCHEDOP_shutdown, &shutdown); +} + +static inline long +HYPERVISOR_set_timer_op( + uint64_t timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor( + uint64_t ma, uint64_t desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping( + unsigned long va, pte_t new_val, unsigned long flags) +{ + return _hypercall4(int, update_va_mapping, va, + new_val.pte_low, new_val.pte_high, flags); +} + +static inline int +HYPERVISOR_event_channel_op( + int cmd, void *op) +{ + return _hypercall2(int, event_channel_op, cmd, op); +} + +static inline int +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op( + int cmd, void *physdev_op) +{ + return _hypercall2(int, physdev_op, cmd, physdev_op); +} + +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte_low, new_val.pte_high, flags, domid); +} + +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op( + unsigned long op, + unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + +static inline int +HYPERVISOR_sysctl( + unsigned long op) +{ + return _hypercall1(int, sysctl, op); +} + +static inline int +HYPERVISOR_domctl( + unsigned long op) +{ + return _hypercall1(int, domctl, op); +} + +static inline unsigned long +HYPERVISOR_hvm_op(int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} + +#endif /* __HYPERCALL_X86_32_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/x86/x86_64/arch_wordsize.h xen-4.9.2/extras/mini-os/include/x86/x86_64/arch_wordsize.h --- xen-4.9.0/extras/mini-os/include/x86/x86_64/arch_wordsize.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/x86_64/arch_wordsize.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,2 @@ +#define __WORDSIZE 64 +#define __WORDSIZE_COMPAT32 1 diff -Nru xen-4.9.0/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h xen-4.9.2/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h --- xen-4.9.0/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,350 @@ +/****************************************************************************** + * hypercall-x86_64.h + * + * Copied from XenLinux. + * + * Copyright (c) 2002-2004, K A Fraser + * + * 64-bit updates: + * Benjamin Liu + * Jun Nakajima + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_X86_64_H__ +#define __HYPERCALL_X86_64_H__ + +#include +#include +#include + +typedef struct { unsigned long pte; } pte_t; + +#define __pte(x) ((pte_t) { (x) } ) + +#define __STR(x) #x +#define STR(x) __STR(x) + +extern char hypercall_page[PAGE_SIZE]; + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res) \ + : \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "movq %7,%%r10; " \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "g" ((long)(a4)) \ + : "memory", "r10" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "movq %7,%%r10; movq %8,%%r8; " \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "g" ((long)(a4)), \ + "g" ((long)(a5)) \ + : "memory", "r10", "r8" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_set_callbacks( + unsigned long event_address, unsigned long failsafe_address, + unsigned long syscall_address) +{ + return _hypercall3(int, set_callbacks, + event_address, failsafe_address, syscall_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline int +HYPERVISOR_shutdown( + unsigned int reason) +{ + struct sched_shutdown shutdown = { .reason = reason }; + return _hypercall2(int, sched_op, SCHEDOP_shutdown, &shutdown); +} + +static inline long +HYPERVISOR_set_timer_op( + uint64_t timeout) +{ + return _hypercall1(long, set_timer_op, timeout); +} + +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor( + unsigned long ma, unsigned long word) +{ + return _hypercall2(int, update_descriptor, ma, word); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping( + unsigned long va, pte_t new_val, unsigned long flags) +{ + return _hypercall3(int, update_va_mapping, va, new_val.pte, flags); +} + +static inline int +HYPERVISOR_event_channel_op( + int cmd, void *op) +{ + return _hypercall2(int, event_channel_op, cmd, op); +} + +static inline int +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op( + int cmd, void *physdev_op) +{ + return _hypercall2(int, physdev_op, cmd, physdev_op); +} + +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + return _hypercall4(int, update_va_mapping_otherdomain, va, + new_val.pte, flags, domid); +} + +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_set_segment_base( + int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op( + unsigned long op, + unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + +static inline int +HYPERVISOR_sysctl( + unsigned long op) +{ + return _hypercall1(int, sysctl, op); +} + +static inline int +HYPERVISOR_domctl( + unsigned long op) +{ + return _hypercall1(int, domctl, op); +} + +static inline unsigned long +HYPERVISOR_hvm_op(int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} + +#endif /* __HYPERCALL_X86_64_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/arch-arm/hvm/save.h xen-4.9.2/extras/mini-os/include/xen/arch-arm/hvm/save.h --- xen-4.9.0/extras/mini-os/include/xen/arch-arm/hvm/save.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/arch-arm/hvm/save.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,39 @@ +/* + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * Copyright (c) 2012 Citrix Systems Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_ARM_H__ +#define __XEN_PUBLIC_HVM_SAVE_ARM_H__ + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/arch-arm.h xen-4.9.2/extras/mini-os/include/xen/arch-arm.h --- xen-4.9.0/extras/mini-os/include/xen/arch-arm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/arch-arm.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,432 @@ +/****************************************************************************** + * arch-arm.h + * + * Guest OS interface to ARM Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright 2011 (C) Citrix Systems + */ + +#ifndef __XEN_PUBLIC_ARCH_ARM_H__ +#define __XEN_PUBLIC_ARCH_ARM_H__ + +/* + * `incontents 50 arm_abi Hypercall Calling Convention + * + * A hypercall is issued using the ARM HVC instruction. + * + * A hypercall can take up to 5 arguments. These are passed in + * registers, the first argument in x0/r0 (for arm64/arm32 guests + * respectively irrespective of whether the underlying hypervisor is + * 32- or 64-bit), the second argument in x1/r1, the third in x2/r2, + * the forth in x3/r3 and the fifth in x4/r4. + * + * The hypercall number is passed in r12 (arm) or x16 (arm64). In both + * cases the relevant ARM procedure calling convention specifies this + * is an inter-procedure-call scratch register (e.g. for use in linker + * stubs). This use does not conflict with use during a hypercall. + * + * The HVC ISS must contain a Xen specific TAG: XEN_HYPERCALL_TAG. + * + * The return value is in x0/r0. + * + * The hypercall will clobber x16/r12 and the argument registers used + * by that hypercall (except r0 which is the return value) i.e. in + * addition to x16/r12 a 2 argument hypercall will clobber x1/r1 and a + * 4 argument hypercall will clobber x1/r1, x2/r2 and x3/r3. + * + * Parameter structs passed to hypercalls are laid out according to + * the Procedure Call Standard for the ARM Architecture (AAPCS, AKA + * EABI) and Procedure Call Standard for the ARM 64-bit Architecture + * (AAPCS64). Where there is a conflict the 64-bit standard should be + * used regardless of guest type. Structures which are passed as + * hypercall arguments are always little endian. + * + * All memory which is shared with other entities in the system + * (including the hypervisor and other guests) must reside in memory + * which is mapped as Normal Inner-cacheable. This applies to: + * - hypercall arguments passed via a pointer to guest memory. + * - memory shared via the grant table mechanism (including PV I/O + * rings etc). + * - memory shared with the hypervisor (struct shared_info, struct + * vcpu_info, the grant table, etc). + * + * Any Inner cache allocation strategy (Write-Back, Write-Through etc) + * is acceptable. There is no restriction on the Outer-cacheability. + */ + +/* + * `incontents 55 arm_hcall Supported Hypercalls + * + * Xen on ARM makes extensive use of hardware facilities and therefore + * only a subset of the potential hypercalls are required. + * + * Since ARM uses second stage paging any machine/physical addresses + * passed to hypercalls are Guest Physical Addresses (Intermediate + * Physical Addresses) unless otherwise noted. + * + * The following hypercalls (and sub operations) are supported on the + * ARM platform. Other hypercalls should be considered + * unavailable/unsupported. + * + * HYPERVISOR_memory_op + * All generic sub-operations + * + * HYPERVISOR_domctl + * All generic sub-operations, with the exception of: + * * XEN_DOMCTL_irq_permission (not yet implemented) + * + * HYPERVISOR_sched_op + * All generic sub-operations, with the exception of: + * * SCHEDOP_block -- prefer wfi hardware instruction + * + * HYPERVISOR_console_io + * All generic sub-operations + * + * HYPERVISOR_xen_version + * All generic sub-operations + * + * HYPERVISOR_event_channel_op + * All generic sub-operations + * + * HYPERVISOR_physdev_op + * No sub-operations are currenty supported + * + * HYPERVISOR_sysctl + * All generic sub-operations, with the exception of: + * * XEN_SYSCTL_page_offline_op + * * XEN_SYSCTL_get_pmstat + * * XEN_SYSCTL_pm_op + * + * HYPERVISOR_hvm_op + * Exactly these sub-operations are supported: + * * HVMOP_set_param + * * HVMOP_get_param + * + * HYPERVISOR_grant_table_op + * All generic sub-operations + * + * HYPERVISOR_vcpu_op + * Exactly these sub-operations are supported: + * * VCPUOP_register_vcpu_info + * * VCPUOP_register_runstate_memory_area + * + * + * Other notes on the ARM ABI: + * + * - struct start_info is not exported to ARM guests. + * + * - struct shared_info is mapped by ARM guests using the + * HYPERVISOR_memory_op sub-op XENMEM_add_to_physmap, passing + * XENMAPSPACE_shared_info as space parameter. + * + * - All the per-cpu struct vcpu_info are mapped by ARM guests using the + * HYPERVISOR_vcpu_op sub-op VCPUOP_register_vcpu_info, including cpu0 + * struct vcpu_info. + * + * - The grant table is mapped using the HYPERVISOR_memory_op sub-op + * XENMEM_add_to_physmap, passing XENMAPSPACE_grant_table as space + * parameter. The memory range specified under the Xen compatible + * hypervisor node on device tree can be used as target gpfn for the + * mapping. + * + * - Xenstore is initialized by using the two hvm_params + * HVM_PARAM_STORE_PFN and HVM_PARAM_STORE_EVTCHN. They can be read + * with the HYPERVISOR_hvm_op sub-op HVMOP_get_param. + * + * - The paravirtualized console is initialized by using the two + * hvm_params HVM_PARAM_CONSOLE_PFN and HVM_PARAM_CONSOLE_EVTCHN. They + * can be read with the HYPERVISOR_hvm_op sub-op HVMOP_get_param. + * + * - Event channel notifications are delivered using the percpu GIC + * interrupt specified under the Xen compatible hypervisor node on + * device tree. + * + * - The device tree Xen compatible node is fully described under Linux + * at Documentation/devicetree/bindings/arm/xen.txt. + */ + +#define XEN_HYPERCALL_TAG 0XEA1 + +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) + +#ifndef __ASSEMBLY__ +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef union { type *p; unsigned long q; } \ + __guest_handle_ ## name; \ + typedef union { type *p; uint64_aligned_t q; } \ + __guest_handle_64_ ## name; + +/* + * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field + * in a struct in memory. On ARM is always 8 bytes sizes and 8 bytes + * aligned. + * XEN_GUEST_HANDLE_PARAM represents a guest pointer, when passed as an + * hypercall argument. It is 4 bytes on aarch32 and 8 bytes on aarch64. + */ +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + ___DEFINE_XEN_GUEST_HANDLE(name, type); \ + ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define __XEN_GUEST_HANDLE(name) __guest_handle_64_ ## name +#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) +#define XEN_GUEST_HANDLE_PARAM(name) __guest_handle_ ## name +#define set_xen_guest_handle_raw(hnd, val) \ + do { \ + typeof(&(hnd)) _sxghr_tmp = &(hnd); \ + _sxghr_tmp->q = 0; \ + _sxghr_tmp->p = val; \ + } while ( 0 ) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif +#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., r0/x0). */ +# define __DECL_REG(n64, n32) union { \ + uint64_t n64; \ + uint32_t n32; \ + } +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., x0). */ +#define __DECL_REG(n64, n32) uint64_t n64 +#endif + +struct vcpu_guest_core_regs +{ + /* Aarch64 Aarch32 */ + __DECL_REG(x0, r0_usr); + __DECL_REG(x1, r1_usr); + __DECL_REG(x2, r2_usr); + __DECL_REG(x3, r3_usr); + __DECL_REG(x4, r4_usr); + __DECL_REG(x5, r5_usr); + __DECL_REG(x6, r6_usr); + __DECL_REG(x7, r7_usr); + __DECL_REG(x8, r8_usr); + __DECL_REG(x9, r9_usr); + __DECL_REG(x10, r10_usr); + __DECL_REG(x11, r11_usr); + __DECL_REG(x12, r12_usr); + + __DECL_REG(x13, sp_usr); + __DECL_REG(x14, lr_usr); + + __DECL_REG(x15, __unused_sp_hyp); + + __DECL_REG(x16, lr_irq); + __DECL_REG(x17, sp_irq); + + __DECL_REG(x18, lr_svc); + __DECL_REG(x19, sp_svc); + + __DECL_REG(x20, lr_abt); + __DECL_REG(x21, sp_abt); + + __DECL_REG(x22, lr_und); + __DECL_REG(x23, sp_und); + + __DECL_REG(x24, r8_fiq); + __DECL_REG(x25, r9_fiq); + __DECL_REG(x26, r10_fiq); + __DECL_REG(x27, r11_fiq); + __DECL_REG(x28, r12_fiq); + + __DECL_REG(x29, sp_fiq); + __DECL_REG(x30, lr_fiq); + + /* Return address and mode */ + __DECL_REG(pc64, pc32); /* ELR_EL2 */ + uint32_t cpsr; /* SPSR_EL2 */ + + union { + uint32_t spsr_el1; /* AArch64 */ + uint32_t spsr_svc; /* AArch32 */ + }; + + /* AArch32 guests only */ + uint32_t spsr_fiq, spsr_irq, spsr_und, spsr_abt; + + /* AArch64 guests only */ + uint64_t sp_el0; + uint64_t sp_el1, elr_el1; +}; +typedef struct vcpu_guest_core_regs vcpu_guest_core_regs_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_core_regs_t); + +#undef __DECL_REG + +typedef uint64_t xen_pfn_t; +#define PRI_xen_pfn PRIx64 + +/* Maximum number of virtual CPUs in legacy multi-processor guests. */ +/* Only one. All other VCPUS must use VCPUOP_register_vcpu_info */ +#define XEN_LEGACY_MAX_VCPUS 1 + +typedef uint64_t xen_ulong_t; +#define PRI_xen_ulong PRIx64 + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +struct vcpu_guest_context { +#define _VGCF_online 0 +#define VGCF_online (1<<_VGCF_online) + uint32_t flags; /* VGCF_* */ + + struct vcpu_guest_core_regs user_regs; /* Core CPU registers */ + + uint32_t sctlr; + uint64_t ttbcr, ttbr0, ttbr1; +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); +#endif + +struct arch_vcpu_info { +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct arch_shared_info { +}; +typedef struct arch_shared_info arch_shared_info_t; +typedef uint64_t xen_callback_t; + +#endif + +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* PSR bits (CPSR, SPSR) */ + +#define PSR_THUMB (1<<5) /* Thumb Mode enable */ +#define PSR_FIQ_MASK (1<<6) /* Fast Interrupt mask */ +#define PSR_IRQ_MASK (1<<7) /* Interrupt mask */ +#define PSR_ABT_MASK (1<<8) /* Asynchronous Abort mask */ +#define PSR_BIG_ENDIAN (1<<9) /* arm32: Big Endian Mode */ +#define PSR_DBG_MASK (1<<9) /* arm64: Debug Exception mask */ +#define PSR_IT_MASK (0x0600fc00) /* Thumb If-Then Mask */ +#define PSR_JAZELLE (1<<24) /* Jazelle Mode */ + +/* 32 bit modes */ +#define PSR_MODE_USR 0x10 +#define PSR_MODE_FIQ 0x11 +#define PSR_MODE_IRQ 0x12 +#define PSR_MODE_SVC 0x13 +#define PSR_MODE_MON 0x16 +#define PSR_MODE_ABT 0x17 +#define PSR_MODE_HYP 0x1a +#define PSR_MODE_UND 0x1b +#define PSR_MODE_SYS 0x1f + +/* 64 bit modes */ +#define PSR_MODE_BIT 0x10 /* Set iff AArch32 */ +#define PSR_MODE_EL3h 0x0d +#define PSR_MODE_EL3t 0x0c +#define PSR_MODE_EL2h 0x09 +#define PSR_MODE_EL2t 0x08 +#define PSR_MODE_EL1h 0x05 +#define PSR_MODE_EL1t 0x04 +#define PSR_MODE_EL0t 0x00 + +#define PSR_GUEST32_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC) +#define PSR_GUEST64_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_EL1h) + +#define SCTLR_GUEST_INIT 0x00c50078 + +/* + * Virtual machine platform (memory layout, interrupts) + * + * These are defined for consistency between the tools and the + * hypervisor. Guests must not rely on these hardcoded values but + * should instead use the FDT. + */ + +/* Physical Address Space */ + +/* + * vGIC mappings: Only one set of mapping is used by the guest. + * Therefore they can overlap. + */ + +/* vGIC v2 mappings */ +#define GUEST_GICD_BASE 0x03001000ULL +#define GUEST_GICD_SIZE 0x00001000ULL +#define GUEST_GICC_BASE 0x03002000ULL +#define GUEST_GICC_SIZE 0x00000100ULL + +/* vGIC v3 mappings */ +#define GUEST_GICV3_GICD_BASE 0x03001000ULL +#define GUEST_GICV3_GICD_SIZE 0x00010000ULL + +#define GUEST_GICV3_RDIST_STRIDE 0x20000ULL +#define GUEST_GICV3_RDIST_REGIONS 1 + +#define GUEST_GICV3_GICR0_BASE 0x03020000ULL /* vCPU0 - vCPU7 */ +#define GUEST_GICV3_GICR0_SIZE 0x00100000ULL + +/* + * 16MB == 4096 pages reserved for guest to use as a region to map its + * grant table in. + */ +#define GUEST_GNTTAB_BASE 0x38000000ULL +#define GUEST_GNTTAB_SIZE 0x01000000ULL + +#define GUEST_MAGIC_BASE 0x39000000ULL +#define GUEST_MAGIC_SIZE 0x01000000ULL + +#define GUEST_RAM_BANKS 2 + +#define GUEST_RAM0_BASE 0x40000000ULL /* 3GB of low RAM @ 1GB */ +#define GUEST_RAM0_SIZE 0xc0000000ULL + +#define GUEST_RAM1_BASE 0x0200000000ULL /* 1016GB of RAM @ 8GB */ +#define GUEST_RAM1_SIZE 0xfe00000000ULL + +#define GUEST_RAM_BASE GUEST_RAM0_BASE /* Lowest RAM address */ +/* Largest amount of actual RAM, not including holes */ +#define GUEST_RAM_MAX (GUEST_RAM0_SIZE + GUEST_RAM1_SIZE) +/* Suitable for e.g. const uint64_t ramfoo[] = GUEST_RAM_BANK_FOOS; */ +#define GUEST_RAM_BANK_BASES { GUEST_RAM0_BASE, GUEST_RAM1_BASE } +#define GUEST_RAM_BANK_SIZES { GUEST_RAM0_SIZE, GUEST_RAM1_SIZE } + +/* Interrupts */ +#define GUEST_TIMER_VIRT_PPI 27 +#define GUEST_TIMER_PHYS_S_PPI 29 +#define GUEST_TIMER_PHYS_NS_PPI 30 +#define GUEST_EVTCHN_PPI 31 + +/* PSCI functions */ +#define PSCI_cpu_suspend 0 +#define PSCI_cpu_off 1 +#define PSCI_cpu_on 2 +#define PSCI_migrate 3 + +#endif + +#endif /* __XEN_PUBLIC_ARCH_ARM_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/arch-x86/cpuid.h xen-4.9.2/extras/mini-os/include/xen/arch-x86/cpuid.h --- xen-4.9.0/extras/mini-os/include/xen/arch-x86/cpuid.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/arch-x86/cpuid.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,90 @@ +/****************************************************************************** + * arch-x86/cpuid.h + * + * CPUID interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007 Citrix Systems, Inc. + * + * Authors: + * Keir Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ +#define __XEN_PUBLIC_ARCH_X86_CPUID_H__ + +/* + * For compatibility with other hypervisor interfaces, the Xen cpuid leaves + * can be found at the first otherwise unused 0x100 aligned boundary starting + * from 0x40000000. + * + * e.g If viridian extensions are enabled for an HVM domain, the Xen cpuid + * leaves will start at 0x40000100 + */ + +#define XEN_CPUID_FIRST_LEAF 0x40000000 +#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) + +/* + * Leaf 1 (0x40000x00) + * EAX: Largest Xen-information leaf. All leaves up to an including @EAX + * are supported by the Xen host. + * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification + * of a Xen host. + */ +#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ +#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ +#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ + +/* + * Leaf 2 (0x40000x01) + * EAX[31:16]: Xen major version. + * EAX[15: 0]: Xen minor version. + * EBX-EDX: Reserved (currently all zeroes). + */ + +/* + * Leaf 3 (0x40000x02) + * EAX: Number of hypercall transfer pages. This register is always guaranteed + * to specify one hypercall page. + * EBX: Base address of Xen-specific MSRs. + * ECX: Features 1. Unused bits are set to zero. + * EDX: Features 2. Unused bits are set to zero. + */ + +/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ +#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 +#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) + +/* + * Leaf 5 (0x40000x04) + * HVM-specific features + * EAX: Features + * EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag) + */ +#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */ +#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Virtualized x2APIC accesses */ +/* Memory mapped from other domains has valid IOMMU entries */ +#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) +#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */ + +#define XEN_CPUID_MAX_NUM_LEAVES 4 + +#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/arch-x86/hvm/save.h xen-4.9.2/extras/mini-os/include/xen/arch-x86/hvm/save.h --- xen-4.9.0/extras/mini-os/include/xen/arch-x86/hvm/save.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/arch-x86/hvm/save.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,630 @@ +/* + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * Copyright (c) 2007 XenSource Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_X86_H__ +#define __XEN_PUBLIC_HVM_SAVE_X86_H__ + +/* + * Save/restore header: general info about the save file. + */ + +#define HVM_FILE_MAGIC 0x54381286 +#define HVM_FILE_VERSION 0x00000001 + +struct hvm_save_header { + uint32_t magic; /* Must be HVM_FILE_MAGIC */ + uint32_t version; /* File format version */ + uint64_t changeset; /* Version of Xen that saved this file */ + uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ + uint32_t gtsc_khz; /* Guest's TSC frequency in kHz */ +}; + +DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); + + +/* + * Processor + * + * Compat: Pre-3.4 didn't have msr_tsc_aux + */ + +struct hvm_hw_cpu { + uint8_t fpu_regs[512]; + + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; + + uint32_t cs_sel; + uint32_t ds_sel; + uint32_t es_sel; + uint32_t fs_sel; + uint32_t gs_sel; + uint32_t ss_sel; + uint32_t tr_sel; + uint32_t ldtr_sel; + + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t es_limit; + uint32_t fs_limit; + uint32_t gs_limit; + uint32_t ss_limit; + uint32_t tr_limit; + uint32_t ldtr_limit; + uint32_t idtr_limit; + uint32_t gdtr_limit; + + uint64_t cs_base; + uint64_t ds_base; + uint64_t es_base; + uint64_t fs_base; + uint64_t gs_base; + uint64_t ss_base; + uint64_t tr_base; + uint64_t ldtr_base; + uint64_t idtr_base; + uint64_t gdtr_base; + + uint32_t cs_arbytes; + uint32_t ds_arbytes; + uint32_t es_arbytes; + uint32_t fs_arbytes; + uint32_t gs_arbytes; + uint32_t ss_arbytes; + uint32_t tr_arbytes; + uint32_t ldtr_arbytes; + + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + + /* msr for em64t */ + uint64_t shadow_gs; + + /* msr content saved/restored. */ + uint64_t msr_flags; + uint64_t msr_lstar; + uint64_t msr_star; + uint64_t msr_cstar; + uint64_t msr_syscall_mask; + uint64_t msr_efer; + uint64_t msr_tsc_aux; + + /* guest's idea of what rdtsc() would return */ + uint64_t tsc; + + /* pending event, if any */ + union { + uint32_t pending_event; + struct { + uint8_t pending_vector:8; + uint8_t pending_type:3; + uint8_t pending_error_valid:1; + uint32_t pending_reserved:19; + uint8_t pending_valid:1; + }; + }; + /* error code for pending event */ + uint32_t error_code; +}; + +struct hvm_hw_cpu_compat { + uint8_t fpu_regs[512]; + + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; + + uint32_t cs_sel; + uint32_t ds_sel; + uint32_t es_sel; + uint32_t fs_sel; + uint32_t gs_sel; + uint32_t ss_sel; + uint32_t tr_sel; + uint32_t ldtr_sel; + + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t es_limit; + uint32_t fs_limit; + uint32_t gs_limit; + uint32_t ss_limit; + uint32_t tr_limit; + uint32_t ldtr_limit; + uint32_t idtr_limit; + uint32_t gdtr_limit; + + uint64_t cs_base; + uint64_t ds_base; + uint64_t es_base; + uint64_t fs_base; + uint64_t gs_base; + uint64_t ss_base; + uint64_t tr_base; + uint64_t ldtr_base; + uint64_t idtr_base; + uint64_t gdtr_base; + + uint32_t cs_arbytes; + uint32_t ds_arbytes; + uint32_t es_arbytes; + uint32_t fs_arbytes; + uint32_t gs_arbytes; + uint32_t ss_arbytes; + uint32_t tr_arbytes; + uint32_t ldtr_arbytes; + + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + + /* msr for em64t */ + uint64_t shadow_gs; + + /* msr content saved/restored. */ + uint64_t msr_flags; + uint64_t msr_lstar; + uint64_t msr_star; + uint64_t msr_cstar; + uint64_t msr_syscall_mask; + uint64_t msr_efer; + /*uint64_t msr_tsc_aux; COMPAT */ + + /* guest's idea of what rdtsc() would return */ + uint64_t tsc; + + /* pending event, if any */ + union { + uint32_t pending_event; + struct { + uint8_t pending_vector:8; + uint8_t pending_type:3; + uint8_t pending_error_valid:1; + uint32_t pending_reserved:19; + uint8_t pending_valid:1; + }; + }; + /* error code for pending event */ + uint32_t error_code; +}; + +static inline int _hvm_hw_fix_cpu(void *h) { + + union hvm_hw_cpu_union { + struct hvm_hw_cpu nat; + struct hvm_hw_cpu_compat cmp; + } *ucpu = (union hvm_hw_cpu_union *)h; + + /* If we copy from the end backwards, we should + * be able to do the modification in-place */ + ucpu->nat.error_code = ucpu->cmp.error_code; + ucpu->nat.pending_event = ucpu->cmp.pending_event; + ucpu->nat.tsc = ucpu->cmp.tsc; + ucpu->nat.msr_tsc_aux = 0; + + return 0; +} + +DECLARE_HVM_SAVE_TYPE_COMPAT(CPU, 2, struct hvm_hw_cpu, \ + struct hvm_hw_cpu_compat, _hvm_hw_fix_cpu); + +/* + * PIC + */ + +struct hvm_hw_vpic { + /* IR line bitmasks. */ + uint8_t irr; + uint8_t imr; + uint8_t isr; + + /* Line IRx maps to IRQ irq_base+x */ + uint8_t irq_base; + + /* + * Where are we in ICW2-4 initialisation (0 means no init in progress)? + * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1). + * Bit 2: ICW1.IC4 (1 == ICW4 included in init sequence) + * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence) + */ + uint8_t init_state:4; + + /* IR line with highest priority. */ + uint8_t priority_add:4; + + /* Reads from A=0 obtain ISR or IRR? */ + uint8_t readsel_isr:1; + + /* Reads perform a polling read? */ + uint8_t poll:1; + + /* Automatically clear IRQs from the ISR during INTA? */ + uint8_t auto_eoi:1; + + /* Automatically rotate IRQ priorities during AEOI? */ + uint8_t rotate_on_auto_eoi:1; + + /* Exclude slave inputs when considering in-service IRQs? */ + uint8_t special_fully_nested_mode:1; + + /* Special mask mode excludes masked IRs from AEOI and priority checks. */ + uint8_t special_mask_mode:1; + + /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */ + uint8_t is_master:1; + + /* Edge/trigger selection. */ + uint8_t elcr; + + /* Virtual INT output. */ + uint8_t int_output; +}; + +DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic); + + +/* + * IO-APIC + */ + +#define VIOAPIC_NUM_PINS 48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */ + +struct hvm_hw_vioapic { + uint64_t base_address; + uint32_t ioregsel; + uint32_t id; + union vioapic_redir_entry + { + uint64_t bits; + struct { + uint8_t vector; + uint8_t delivery_mode:3; + uint8_t dest_mode:1; + uint8_t delivery_status:1; + uint8_t polarity:1; + uint8_t remote_irr:1; + uint8_t trig_mode:1; + uint8_t mask:1; + uint8_t reserve:7; + uint8_t reserved[4]; + uint8_t dest_id; + } fields; + } redirtbl[VIOAPIC_NUM_PINS]; +}; + +DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic); + + +/* + * LAPIC + */ + +struct hvm_hw_lapic { + uint64_t apic_base_msr; + uint32_t disabled; /* VLAPIC_xx_DISABLED */ + uint32_t timer_divisor; + uint64_t tdt_msr; +}; + +DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic); + +struct hvm_hw_lapic_regs { + uint8_t data[1024]; +}; + +DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs); + + +/* + * IRQs + */ + +struct hvm_hw_pci_irqs { + /* + * Virtual interrupt wires for a single PCI bus. + * Indexed by: device*4 + INTx#. + */ + union { + unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */ + uint64_t pad[2]; + }; +}; + +DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs); + +struct hvm_hw_isa_irqs { + /* + * Virtual interrupt wires for ISA devices. + * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). + */ + union { + unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */ + uint64_t pad[1]; + }; +}; + +DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs); + +struct hvm_hw_pci_link { + /* + * PCI-ISA interrupt router. + * Each PCI is 'wire-ORed' into one of four links using + * the traditional 'barber's pole' mapping ((device + INTx#) & 3). + * The router provides a programmable mapping from each link to a GSI. + */ + uint8_t route[4]; + uint8_t pad0[4]; +}; + +DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link); + +/* + * PIT + */ + +struct hvm_hw_pit { + struct hvm_hw_pit_channel { + uint32_t count; /* can be 65536 */ + uint16_t latched_count; + uint8_t count_latched; + uint8_t status_latched; + uint8_t status; + uint8_t read_state; + uint8_t write_state; + uint8_t write_latch; + uint8_t rw_mode; + uint8_t mode; + uint8_t bcd; /* not supported */ + uint8_t gate; /* timer start */ + } channels[3]; /* 3 x 16 bytes */ + uint32_t speaker_data_on; + uint32_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit); + + +/* + * RTC + */ + +#define RTC_CMOS_SIZE 14 +struct hvm_hw_rtc { + /* CMOS bytes */ + uint8_t cmos_data[RTC_CMOS_SIZE]; + /* Index register for 2-part operations */ + uint8_t cmos_index; + uint8_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc); + + +/* + * HPET + */ + +#define HPET_TIMER_NUM 3 /* 3 timers supported now */ +struct hvm_hw_hpet { + /* Memory-mapped, software visible registers */ + uint64_t capability; /* capabilities */ + uint64_t res0; /* reserved */ + uint64_t config; /* configuration */ + uint64_t res1; /* reserved */ + uint64_t isr; /* interrupt status reg */ + uint64_t res2[25]; /* reserved */ + uint64_t mc64; /* main counter */ + uint64_t res3; /* reserved */ + struct { /* timers */ + uint64_t config; /* configuration/cap */ + uint64_t cmp; /* comparator */ + uint64_t fsb; /* FSB route, not supported now */ + uint64_t res4; /* reserved */ + } timers[HPET_TIMER_NUM]; + uint64_t res5[4*(24-HPET_TIMER_NUM)]; /* reserved, up to 0x3ff */ + + /* Hidden register state */ + uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */ +}; + +DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet); + + +/* + * PM timer + */ + +struct hvm_hw_pmtimer { + uint32_t tmr_val; /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */ + uint16_t pm1a_sts; /* PM1a_EVT_BLK.PM1a_STS: status register */ + uint16_t pm1a_en; /* PM1a_EVT_BLK.PM1a_EN: enable register */ +}; + +DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer); + +/* + * MTRR MSRs + */ + +struct hvm_hw_mtrr { +#define MTRR_VCNT 8 +#define NUM_FIXED_MSR 11 + uint64_t msr_pat_cr; + /* mtrr physbase & physmask msr pair*/ + uint64_t msr_mtrr_var[MTRR_VCNT*2]; + uint64_t msr_mtrr_fixed[NUM_FIXED_MSR]; + uint64_t msr_mtrr_cap; + uint64_t msr_mtrr_def_type; +}; + +DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr); + +/* + * The save area of XSAVE/XRSTOR. + */ + +struct hvm_hw_cpu_xsave { + uint64_t xfeature_mask; /* Ignored */ + uint64_t xcr0; /* Updated by XSETBV */ + uint64_t xcr0_accum; /* Updated by XSETBV */ + struct { + struct { char x[512]; } fpu_sse; + + struct { + uint64_t xstate_bv; /* Updated by XRSTOR */ + uint64_t reserved[7]; + } xsave_hdr; /* The 64-byte header */ + + struct { char x[0]; } ymm; /* YMM */ + } save_area; +}; + +#define CPU_XSAVE_CODE 16 + +/* + * Viridian hypervisor context. + */ + +struct hvm_viridian_domain_context { + uint64_t hypercall_gpa; + uint64_t guest_os_id; + uint64_t time_ref_count; + uint64_t reference_tsc; +}; + +DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context); + +struct hvm_viridian_vcpu_context { + uint64_t apic_assist; +}; + +DECLARE_HVM_SAVE_TYPE(VIRIDIAN_VCPU, 17, struct hvm_viridian_vcpu_context); + +struct hvm_vmce_vcpu { + uint64_t caps; + uint64_t mci_ctl2_bank0; + uint64_t mci_ctl2_bank1; +}; + +DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu); + +struct hvm_tsc_adjust { + uint64_t tsc_adjust; +}; + +DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); + + +struct hvm_msr { + uint32_t count; + struct hvm_one_msr { + uint32_t index; + uint32_t _rsvd; + uint64_t val; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + } msr[]; +#elif defined(__GNUC__) + } msr[0]; +#else + } msr[1 /* variable size */]; +#endif +}; + +#define CPU_MSR_CODE 20 + +/* + * Largest type-code in use + */ +#define HVM_SAVE_CODE_MAX 20 + +#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/arch-x86/hvm/start_info.h xen-4.9.2/extras/mini-os/include/xen/arch-x86/hvm/start_info.h --- xen-4.9.0/extras/mini-os/include/xen/arch-x86/hvm/start_info.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/arch-x86/hvm/start_info.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,98 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2016, Citrix Systems, Inc. + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ +#define __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ + +/* + * Start of day structure passed to PVH guests and to HVM guests in %ebx. + * + * NOTE: nothing will be loaded at physical address 0, so a 0 value in any + * of the address fields should be treated as not present. + * + * 0 +----------------+ + * | magic | Contains the magic value XEN_HVM_START_MAGIC_VALUE + * | | ("xEn3" with the 0x80 bit of the "E" set). + * 4 +----------------+ + * | version | Version of this structure. Current version is 0. New + * | | versions are guaranteed to be backwards-compatible. + * 8 +----------------+ + * | flags | SIF_xxx flags. + * 12 +----------------+ + * | nr_modules | Number of modules passed to the kernel. + * 16 +----------------+ + * | modlist_paddr | Physical address of an array of modules + * | | (layout of the structure below). + * 24 +----------------+ + * | cmdline_paddr | Physical address of the command line, + * | | a zero-terminated ASCII string. + * 32 +----------------+ + * | rsdp_paddr | Physical address of the RSDP ACPI data structure. + * 40 +----------------+ + * + * The layout of each entry in the module structure is the following: + * + * 0 +----------------+ + * | paddr | Physical address of the module. + * 8 +----------------+ + * | size | Size of the module in bytes. + * 16 +----------------+ + * | cmdline_paddr | Physical address of the command line, + * | | a zero-terminated ASCII string. + * 24 +----------------+ + * | reserved | + * 32 +----------------+ + * + * The address and sizes are always a 64bit little endian unsigned integer. + * + * NB: Xen on x86 will always try to place all the data below the 4GiB + * boundary. + */ +#define XEN_HVM_START_MAGIC_VALUE 0x336ec578 + +/* + * C representation of the x86/HVM start info layout. + * + * The canonical definition of this layout is above, this is just a way to + * represent the layout described there using C types. + */ +struct hvm_start_info { + uint32_t magic; /* Contains the magic value 0x336ec578 */ + /* ("xEn3" with the 0x80 bit of the "E" set).*/ + uint32_t version; /* Version of this structure. */ + uint32_t flags; /* SIF_xxx flags. */ + uint32_t nr_modules; /* Number of modules passed to the kernel. */ + uint64_t modlist_paddr; /* Physical address of an array of */ + /* hvm_modlist_entry. */ + uint64_t cmdline_paddr; /* Physical address of the command line. */ + uint64_t rsdp_paddr; /* Physical address of the RSDP ACPI data */ + /* structure. */ +}; + +struct hvm_modlist_entry { + uint64_t paddr; /* Physical address of the module. */ + uint64_t size; /* Size of the module in bytes. */ + uint64_t cmdline_paddr; /* Physical address of the command line. */ + uint64_t reserved; +}; + +#endif /* __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/arch-x86/xen.h xen-4.9.2/extras/mini-os/include/xen/arch-x86/xen.h --- xen-4.9.0/extras/mini-os/include/xen/arch-x86/xen.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/arch-x86/xen.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,303 @@ +/****************************************************************************** + * arch-x86/xen.h + * + * Guest OS interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "../xen.h" + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_H__ + +/* Structural guest handles introduced in 0x00030201. */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030201 +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name +#else +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef type * __guest_handle_ ## name +#endif + +/* + * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field + * in a struct in memory. + * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an + * hypercall argument. + * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but + * they might not be on other architectures. + */ +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + ___DEFINE_XEN_GUEST_HANDLE(name, type); \ + ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) +#define XEN_GUEST_HANDLE_PARAM(name) XEN_GUEST_HANDLE(name) +#define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif +#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) + +#if defined(__i386__) +#include "xen-x86_32.h" +#elif defined(__x86_64__) +#include "xen-x86_64.h" +#endif + +#ifndef __ASSEMBLY__ +typedef unsigned long xen_pfn_t; +#define PRI_xen_pfn "lx" +#endif + +#define XEN_HAVE_PV_GUEST_ENTRY 1 + +#define XEN_HAVE_PV_UPCALL_MASK 1 + +/* + * `incontents 200 segdesc Segment Descriptor Tables + */ +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries); + * ` + */ +/* + * A number of GDT entries are reserved by Xen. These are not situated at the + * start of the GDT because some stupid OSes export hard-coded selector values + * in their ABI. These hard-coded values are always near the start of the GDT, + * so Xen places itself out of the way, at the far end of the GDT. + * + * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op + */ +#define FIRST_RESERVED_GDT_PAGE 14 +#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) +#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) + + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc); + * ` + * ` @pa The machine physical address of the descriptor to + * ` update. Must be either a descriptor page or writable. + * ` @desc The descriptor value to update, in the same format as a + * ` native descriptor table entry. + */ + +/* Maximum number of virtual CPUs in legacy multi-processor guests. */ +#define XEN_LEGACY_MAX_VCPUS 32 + +#ifndef __ASSEMBLY__ + +typedef unsigned long xen_ulong_t; +#define PRI_xen_ulong "lx" + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp); + * ` + * Sets the stack segment and pointer for the current vcpu. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]); + * ` + */ +/* + * Send an array of these to HYPERVISOR_set_trap_table(). + * Terminate the array with a sentinel entry, with traps[].address==0. + * The privilege level specifies which modes may enter a trap via a software + * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate + * privilege levels as follows: + * Level == 0: Noone may enter + * Level == 1: Kernel may enter + * Level == 2: Kernel may enter + * Level == 3: Everyone may enter + */ +#define TI_GET_DPL(_ti) ((_ti)->flags & 3) +#define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) +#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) +struct trap_info { + uint8_t vector; /* exception vector */ + uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ + uint16_t cs; /* code selector */ + unsigned long address; /* code offset */ +}; +typedef struct trap_info trap_info_t; +DEFINE_XEN_GUEST_HANDLE(trap_info_t); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +/* + * The following is all CPU context. Note that the fpu_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + * + * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise + * for HVM and PVH guests, not all information in this structure is updated: + * + * - For HVM guests, the structures read include: fpu_ctxt (if + * VGCT_I387_VALID is set), flags, user_regs, debugreg[*] + * + * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to + * set cr3. All other fields not used should be set to 0. + */ +struct vcpu_guest_context { + /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ + struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ +#define VGCF_I387_VALID (1<<0) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) +#define _VGCF_syscall_disables_events 4 +#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) +#define _VGCF_online 5 +#define VGCF_online (1<<_VGCF_online) + unsigned long flags; /* VGCF_* flags */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ + struct trap_info trap_ctxt[256]; /* Virtual IDT */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ + unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ +#ifdef __i386__ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; +#else + unsigned long event_callback_eip; + unsigned long failsafe_callback_eip; +#ifdef __XEN__ + union { + unsigned long syscall_callback_eip; + struct { + unsigned int event_callback_cs; /* compat CS of event cb */ + unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ + }; + }; +#else + unsigned long syscall_callback_eip; +#endif +#endif + unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ +#ifdef __x86_64__ + /* Segment base addresses. */ + uint64_t fs_base; + uint64_t gs_base_kernel; + uint64_t gs_base_user; +#endif +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +struct arch_shared_info { + /* + * Number of valid entries in the p2m table(s) anchored at + * pfn_to_mfn_frame_list_list and/or p2m_vaddr. + */ + unsigned long max_pfn; + /* + * Frame containing list of mfns containing list of mfns containing p2m. + * A value of 0 indicates it has not yet been set up, ~0 indicates it has + * been set to invalid e.g. due to the p2m being too large for the 3-level + * p2m tree. In this case the linear mapper p2m list anchored at p2m_vaddr + * is to be used. + */ + xen_pfn_t pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; + /* + * Following three fields are valid if p2m_cr3 contains a value different + * from 0. + * p2m_cr3 is the root of the address space where p2m_vaddr is valid. + * p2m_cr3 is in the same format as a cr3 value in the vcpu register state + * and holds the folded machine frame number (via xen_pfn_to_cr3) of a + * L3 or L4 page table. + * p2m_vaddr holds the virtual address of the linear p2m list. All entries + * in the range [0...max_pfn[ are accessible via this pointer. + * p2m_generation will be incremented by the guest before and after each + * change of the mappings of the p2m list. p2m_generation starts at 0 and + * a value with the least significant bit set indicates that a mapping + * update is in progress. This allows guest external software (e.g. in Dom0) + * to verify that read mappings are consistent and whether they have changed + * since the last check. + * Modifying a p2m element in the linear p2m list is allowed via an atomic + * write only. + */ + unsigned long p2m_cr3; /* cr3 value of the p2m address space */ + unsigned long p2m_vaddr; /* virtual address of the p2m list */ + unsigned long p2m_generation; /* generation count of p2m mapping */ +}; +typedef struct arch_shared_info arch_shared_info_t; + +#endif /* !__ASSEMBLY__ */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_fpu_taskswitch(int set); + * ` + * Sets (if set!=0) or clears (if set==0) CR0.TS. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_debugreg(int regno, unsigned long value); + * + * ` unsigned long + * ` HYPERVISOR_get_debugreg(int regno); + * For 0<=reg<=7, returns the debug register value. + * For other values of reg, returns ((unsigned long)-EINVAL). + * (Unfortunately, this interface is defective.) + */ + +/* + * Prefix forces emulation of some non-trapping instructions. + * Currently only CPUID. + */ +#ifdef __ASSEMBLY__ +#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; +#define XEN_CPUID XEN_EMULATE_PREFIX cpuid +#else +#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " +#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" +#endif + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/arch-x86/xen-mca.h xen-4.9.2/extras/mini-os/include/xen/arch-x86/xen-mca.h --- xen-4.9.0/extras/mini-os/include/xen/arch-x86/xen-mca.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/arch-x86/xen-mca.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,440 @@ +/****************************************************************************** + * arch-x86/mca.h + * + * Contributed by Advanced Micro Devices, Inc. + * Author: Christoph Egger + * + * Guest OS machine check interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* Full MCA functionality has the following Usecases from the guest side: + * + * Must have's: + * 1. Dom0 and DomU register machine check trap callback handlers + * (already done via "set_trap_table" hypercall) + * 2. Dom0 registers machine check event callback handler + * (doable via EVTCHNOP_bind_virq) + * 3. Dom0 and DomU fetches machine check data + * 4. Dom0 wants Xen to notify a DomU + * 5. Dom0 gets DomU ID from physical address + * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy") + * + * Nice to have's: + * 7. Dom0 wants Xen to deactivate a physical CPU + * This is better done as separate task, physical CPU hotplugging, + * and hypercall(s) should be sysctl's + * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to + * move a DomU (or Dom0 itself) away from a malicious page + * producing correctable errors. + * 9. offlining physical page: + * Xen free's and never re-uses a certain physical page. + * 10. Testfacility: Allow Dom0 to write values into machine check MSR's + * and tell Xen to trigger a machine check + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__ +#define __XEN_PUBLIC_ARCH_X86_MCA_H__ + +/* Hypercall */ +#define __HYPERVISOR_mca __HYPERVISOR_arch_0 + +/* + * The xen-unstable repo has interface version 0x03000001; out interface + * is incompatible with that and any future minor revisions, so we + * choose a different version number range that is numerically less + * than that used in xen-unstable. + */ +#define XEN_MCA_INTERFACE_VERSION 0x01ecc003 + +/* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */ +#define XEN_MC_NONURGENT 0x0001 +/* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */ +#define XEN_MC_URGENT 0x0002 +/* IN: Dom0 acknowledges previosly-fetched telemetry */ +#define XEN_MC_ACK 0x0004 + +/* OUT: All is ok */ +#define XEN_MC_OK 0x0 +/* OUT: Domain could not fetch data. */ +#define XEN_MC_FETCHFAILED 0x1 +/* OUT: There was no machine check data to fetch. */ +#define XEN_MC_NODATA 0x2 +/* OUT: Between notification time and this hypercall an other + * (most likely) correctable error happened. The fetched data, + * does not match the original machine check data. */ +#define XEN_MC_NOMATCH 0x4 + +/* OUT: DomU did not register MC NMI handler. Try something else. */ +#define XEN_MC_CANNOTHANDLE 0x8 +/* OUT: Notifying DomU failed. Retry later or try something else. */ +#define XEN_MC_NOTDELIVERED 0x10 +/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */ + + +#ifndef __ASSEMBLY__ + +#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */ + +/* + * Machine Check Architecure: + * structs are read-only and used to report all kinds of + * correctable and uncorrectable errors detected by the HW. + * Dom0 and DomU: register a handler to get notified. + * Dom0 only: Correctable errors are reported via VIRQ_MCA + * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers + */ +#define MC_TYPE_GLOBAL 0 +#define MC_TYPE_BANK 1 +#define MC_TYPE_EXTENDED 2 +#define MC_TYPE_RECOVERY 3 + +struct mcinfo_common { + uint16_t type; /* structure type */ + uint16_t size; /* size of this struct in bytes */ +}; + + +#define MC_FLAG_CORRECTABLE (1 << 0) +#define MC_FLAG_UNCORRECTABLE (1 << 1) +#define MC_FLAG_RECOVERABLE (1 << 2) +#define MC_FLAG_POLLED (1 << 3) +#define MC_FLAG_RESET (1 << 4) +#define MC_FLAG_CMCI (1 << 5) +#define MC_FLAG_MCE (1 << 6) +/* contains global x86 mc information */ +struct mcinfo_global { + struct mcinfo_common common; + + /* running domain at the time in error (most likely the impacted one) */ + uint16_t mc_domid; + uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ + uint32_t mc_socketid; /* physical socket of the physical core */ + uint16_t mc_coreid; /* physical impacted core */ + uint16_t mc_core_threadid; /* core thread of physical core */ + uint32_t mc_apicid; + uint32_t mc_flags; + uint64_t mc_gstatus; /* global status */ +}; + +/* contains bank local x86 mc information */ +struct mcinfo_bank { + struct mcinfo_common common; + + uint16_t mc_bank; /* bank nr */ + uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0 + * and if mc_addr is valid. Never valid on DomU. */ + uint64_t mc_status; /* bank status */ + uint64_t mc_addr; /* bank address, only valid + * if addr bit is set in mc_status */ + uint64_t mc_misc; + uint64_t mc_ctrl2; + uint64_t mc_tsc; +}; + + +struct mcinfo_msr { + uint64_t reg; /* MSR */ + uint64_t value; /* MSR value */ +}; + +/* contains mc information from other + * or additional mc MSRs */ +struct mcinfo_extended { + struct mcinfo_common common; + + /* You can fill up to five registers. + * If you need more, then use this structure + * multiple times. */ + + uint32_t mc_msrs; /* Number of msr with valid values. */ + /* + * Currently Intel extended MSR (32/64) include all gp registers + * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be + * useful at present. So expand this array to 16/32 to leave room. + */ + struct mcinfo_msr mc_msr[sizeof(void *) * 4]; +}; + +/* Recovery Action flags. Giving recovery result information to DOM0 */ + +/* Xen takes successful recovery action, the error is recovered */ +#define REC_ACTION_RECOVERED (0x1 << 0) +/* No action is performed by XEN */ +#define REC_ACTION_NONE (0x1 << 1) +/* It's possible DOM0 might take action ownership in some case */ +#define REC_ACTION_NEED_RESET (0x1 << 2) + +/* Different Recovery Action types, if the action is performed successfully, + * REC_ACTION_RECOVERED flag will be returned. + */ + +/* Page Offline Action */ +#define MC_ACTION_PAGE_OFFLINE (0x1 << 0) +/* CPU offline Action */ +#define MC_ACTION_CPU_OFFLINE (0x1 << 1) +/* L3 cache disable Action */ +#define MC_ACTION_CACHE_SHRINK (0x1 << 2) + +/* Below interface used between XEN/DOM0 for passing XEN's recovery action + * information to DOM0. + * usage Senario: After offlining broken page, XEN might pass its page offline + * recovery action result to DOM0. DOM0 will save the information in + * non-volatile memory for further proactive actions, such as offlining the + * easy broken page earlier when doing next reboot. +*/ +struct page_offline_action +{ + /* Params for passing the offlined page number to DOM0 */ + uint64_t mfn; + uint64_t status; +}; + +struct cpu_offline_action +{ + /* Params for passing the identity of the offlined CPU to DOM0 */ + uint32_t mc_socketid; + uint16_t mc_coreid; + uint16_t mc_core_threadid; +}; + +#define MAX_UNION_SIZE 16 +struct mcinfo_recovery +{ + struct mcinfo_common common; + uint16_t mc_bank; /* bank nr */ + uint8_t action_flags; + uint8_t action_types; + union { + struct page_offline_action page_retire; + struct cpu_offline_action cpu_offline; + uint8_t pad[MAX_UNION_SIZE]; + } action_info; +}; + + +#define MCINFO_HYPERCALLSIZE 1024 +#define MCINFO_MAXSIZE 768 + +#define MCINFO_FLAGS_UNCOMPLETE 0x1 +struct mc_info { + /* Number of mcinfo_* entries in mi_data */ + uint32_t mi_nentries; + uint32_t flags; + uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8]; +}; +typedef struct mc_info mc_info_t; +DEFINE_XEN_GUEST_HANDLE(mc_info_t); + +#define __MC_MSR_ARRAYSIZE 8 +#define __MC_NMSRS 1 +#define MC_NCAPS 7 /* 7 CPU feature flag words */ +#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */ +#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */ +#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */ +#define MC_CAPS_LINUX 3 /* Linux-defined */ +#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */ +#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ +#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ + +struct mcinfo_logical_cpu { + uint32_t mc_cpunr; + uint32_t mc_chipid; + uint16_t mc_coreid; + uint16_t mc_threadid; + uint32_t mc_apicid; + uint32_t mc_clusterid; + uint32_t mc_ncores; + uint32_t mc_ncores_active; + uint32_t mc_nthreads; + int32_t mc_cpuid_level; + uint32_t mc_family; + uint32_t mc_vendor; + uint32_t mc_model; + uint32_t mc_step; + char mc_vendorid[16]; + char mc_brandid[64]; + uint32_t mc_cpu_caps[MC_NCAPS]; + uint32_t mc_cache_size; + uint32_t mc_cache_alignment; + int32_t mc_nmsrvals; + struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; +}; +typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); + + +/* + * OS's should use these instead of writing their own lookup function + * each with its own bugs and drawbacks. + * We use macros instead of static inline functions to allow guests + * to include this header in assembly files (*.S). + */ +/* Prototype: + * uint32_t x86_mcinfo_nentries(struct mc_info *mi); + */ +#define x86_mcinfo_nentries(_mi) \ + (_mi)->mi_nentries +/* Prototype: + * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); + */ +#define x86_mcinfo_first(_mi) \ + ((struct mcinfo_common *)(_mi)->mi_data) +/* Prototype: + * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); + */ +#define x86_mcinfo_next(_mic) \ + ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)) + +/* Prototype: + * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); + */ +#define x86_mcinfo_lookup(_ret, _mi, _type) \ + do { \ + uint32_t found, i; \ + struct mcinfo_common *_mic; \ + \ + found = 0; \ + (_ret) = NULL; \ + if (_mi == NULL) break; \ + _mic = x86_mcinfo_first(_mi); \ + for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \ + if (_mic->type == (_type)) { \ + found = 1; \ + break; \ + } \ + _mic = x86_mcinfo_next(_mic); \ + } \ + (_ret) = found ? _mic : NULL; \ + } while (0) + + +/* Usecase 1 + * Register machine check trap callback handler + * (already done via "set_trap_table" hypercall) + */ + +/* Usecase 2 + * Dom0 registers machine check event callback handler + * done by EVTCHNOP_bind_virq + */ + +/* Usecase 3 + * Fetch machine check data from hypervisor. + * Note, this hypercall is special, because both Dom0 and DomU must use this. + */ +#define XEN_MC_fetch 1 +struct xen_mc_fetch { + /* IN/OUT variables. */ + uint32_t flags; /* IN: XEN_MC_NONURGENT, XEN_MC_URGENT, + XEN_MC_ACK if ack'ing an earlier fetch */ + /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, + XEN_MC_NODATA, XEN_MC_NOMATCH */ + uint32_t _pad0; + uint64_t fetch_id; /* OUT: id for ack, IN: id we are ack'ing */ + + /* OUT variables. */ + XEN_GUEST_HANDLE(mc_info_t) data; +}; +typedef struct xen_mc_fetch xen_mc_fetch_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t); + + +/* Usecase 4 + * This tells the hypervisor to notify a DomU about the machine check error + */ +#define XEN_MC_notifydomain 2 +struct xen_mc_notifydomain { + /* IN variables. */ + uint16_t mc_domid; /* The unprivileged domain to notify. */ + uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify. + * Usually echo'd value from the fetch hypercall. */ + + /* IN/OUT variables. */ + uint32_t flags; + +/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */ +/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */ +}; +typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); + +#define XEN_MC_physcpuinfo 3 +struct xen_mc_physcpuinfo { + /* IN/OUT */ + uint32_t ncpus; + uint32_t _pad0; + /* OUT */ + XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; +}; + +#define XEN_MC_msrinject 4 +#define MC_MSRINJ_MAXMSRS 8 +struct xen_mc_msrinject { + /* IN */ + uint32_t mcinj_cpunr; /* target processor id */ + uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ + uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ + uint32_t _pad0; + struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; +}; + +/* Flags for mcinj_flags above; bits 16-31 are reserved */ +#define MC_MSRINJ_F_INTERPOSE 0x1 + +#define XEN_MC_mceinject 5 +struct xen_mc_mceinject { + unsigned int mceinj_cpunr; /* target processor id */ +}; + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#define XEN_MC_inject_v2 6 +#define XEN_MC_INJECT_TYPE_MASK 0x7 +#define XEN_MC_INJECT_TYPE_MCE 0x0 +#define XEN_MC_INJECT_TYPE_CMCI 0x1 + +#define XEN_MC_INJECT_CPU_BROADCAST 0x8 + +struct xen_mc_inject_v2 { + uint32_t flags; + struct xenctl_bitmap cpumap; +}; +#endif + +struct xen_mc { + uint32_t cmd; + uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ + union { + struct xen_mc_fetch mc_fetch; + struct xen_mc_notifydomain mc_notifydomain; + struct xen_mc_physcpuinfo mc_physcpuinfo; + struct xen_mc_msrinject mc_msrinject; + struct xen_mc_mceinject mc_mceinject; +#if defined(__XEN__) || defined(__XEN_TOOLS__) + struct xen_mc_inject_v2 mc_inject_v2; +#endif + } u; +}; +typedef struct xen_mc xen_mc_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_t); + +#endif /* __ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/arch-x86/xen-x86_32.h xen-4.9.2/extras/mini-os/include/xen/arch-x86/xen-x86_32.h --- xen-4.9.0/extras/mini-os/include/xen/arch-x86/xen-x86_32.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/arch-x86/xen-x86_32.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,173 @@ +/****************************************************************************** + * xen-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2007, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ + +/* + * Hypercall interface: + * Input: %ebx, %ecx, %edx, %esi, %edi, %ebp (arguments 1-6) + * Output: %eax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) + */ + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +#ifdef CONFIG_PARAVIRT +#define __HYPERVISOR_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_END_PAE 0xF6800000 +#define HYPERVISOR_VIRT_START_PAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) +#define MACH2PHYS_VIRT_START_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) +#define MACH2PHYS_VIRT_END_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) + +/* Non-PAE bounds are obsolete. */ +#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 +#define HYPERVISOR_VIRT_START_NONPAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_START_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_END_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) + +#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE +#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE +#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) +#endif +#endif + +/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#undef ___DEFINE_XEN_GUEST_HANDLE +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } \ + __guest_handle_ ## name; \ + typedef struct { union { type *p; uint64_aligned_t q; }; } \ + __guest_handle_64_ ## name +#undef set_xen_guest_handle_raw +#define set_xen_guest_handle_raw(hnd, val) \ + do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ + (hnd).p = val; \ + } while ( 0 ) +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) +#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name +#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name) +#endif + +#ifndef __ASSEMBLY__ + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct xen_callback { + unsigned long cs; + unsigned long eip; +}; +typedef struct xen_callback xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/arch-x86/xen-x86_64.h xen-4.9.2/extras/mini-os/include/xen/arch-x86/xen-x86_64.h --- xen-4.9.0/extras/mini-os/include/xen/arch-x86/xen-x86_64.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/arch-x86/xen-x86_64.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,204 @@ +/****************************************************************************** + * xen-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ + +/* + * Hypercall interface: + * Input: %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6) + * Output: %rax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) + */ + +/* + * 64-bit segment selectors + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ + +#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ + +#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 +#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 +#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 +#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 +#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 +#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 +#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 +#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 +#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 + +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 + +#ifdef CONFIG_PARAVIRT +#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 +#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif +#endif + +/* + * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) + * @which == SEGBASE_* ; @base == 64-bit base address + * Returns 0 on success. + */ +#define SEGBASE_FS 0 +#define SEGBASE_GS_USER 1 +#define SEGBASE_GS_KERNEL 2 +#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ + +/* + * int HYPERVISOR_iret(void) + * All arguments are on the kernel stack, in the following format. + * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq + * If flags contains VGCF_in_syscall: + * Restore RAX, RIP, RFLAGS, RSP. + * Discard R11, RCX, CS, SS. + * Otherwise: + * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. + * All other registers are saved on hypercall entry and restored to user. + */ +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ +#define _VGCF_in_syscall 8 +#define VGCF_in_syscall (1<<_VGCF_in_syscall) +#define VGCF_IN_SYSCALL VGCF_in_syscall + +#ifndef __ASSEMBLY__ + +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ +#define __DECL_REG(name) uint64_t r ## name +#endif + +struct cpu_user_regs { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + __DECL_REG(bp); + __DECL_REG(bx); + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + __DECL_REG(ax); + __DECL_REG(cx); + __DECL_REG(dx); + __DECL_REG(si); + __DECL_REG(di); + uint32_t error_code; /* private */ + uint32_t entry_vector; /* private */ + __DECL_REG(ip); + uint16_t cs, _pad0[1]; + uint8_t saved_upcall_mask; + uint8_t _pad1[3]; + __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG(sp); + uint16_t ss, _pad2[3]; + uint16_t es, _pad3[3]; + uint16_t ds, _pad4[3]; + uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +#undef __DECL_REG + +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +typedef unsigned long xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/arch-x86_32.h xen-4.9.2/extras/mini-os/include/xen/arch-x86_32.h --- xen-4.9.0/extras/mini-os/include/xen/arch-x86_32.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/arch-x86_32.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,27 @@ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "arch-x86/xen.h" diff -Nru xen-4.9.0/extras/mini-os/include/xen/arch-x86_64.h xen-4.9.2/extras/mini-os/include/xen/arch-x86_64.h --- xen-4.9.0/extras/mini-os/include/xen/arch-x86_64.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/arch-x86_64.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,43 @@ +/****************************************************************************** + * arch-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "arch-x86/xen.h" + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_callbacks(unsigned long event_selector, + * ` unsigned long event_address, + * ` unsigned long failsafe_selector, + * ` unsigned long failsafe_address); + * ` + * Register for callbacks on events. When an event (from an event + * channel) occurs, event_address is used as the value of eip. + * + * A similar callback occurs if the segment selectors are invalid. + * failsafe_address is used as the value of eip. + * + * On x86_64, event_selector and failsafe_selector are ignored (???). + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/callback.h xen-4.9.2/extras/mini-os/include/xen/callback.h --- xen-4.9.0/extras/mini-os/include/xen/callback.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/callback.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,121 @@ +/****************************************************************************** + * callback.h + * + * Register guest OS callbacks with Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Ian Campbell + */ + +#ifndef __XEN_PUBLIC_CALLBACK_H__ +#define __XEN_PUBLIC_CALLBACK_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * long callback_op(int cmd, void *extra_args) + * @cmd == CALLBACKOP_??? (callback operation). + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +/* x86: Callback for event delivery. */ +#define CALLBACKTYPE_event 0 + +/* x86: Failsafe callback when guest state cannot be restored by Xen. */ +#define CALLBACKTYPE_failsafe 1 + +/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */ +#define CALLBACKTYPE_syscall 2 + +/* + * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel + * feature is enabled. Do not use this callback type in new code. + */ +#define CALLBACKTYPE_sysenter_deprecated 3 + +/* x86: Callback for NMI delivery. */ +#define CALLBACKTYPE_nmi 4 + +/* + * x86: sysenter is only available as follows: + * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled + * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs + * ('32-on-32-on-64', '32-on-64-on-64') + * [nb. also 64-bit guest applications on Intel CPUs + * ('64-on-64-on-64'), but syscall is preferred] + */ +#define CALLBACKTYPE_sysenter 5 + +/* + * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs + * ('32-on-32-on-64', '32-on-64-on-64') + */ +#define CALLBACKTYPE_syscall32 7 + +/* + * Disable event deliver during callback? This flag is ignored for event and + * NMI callbacks: event delivery is unconditionally disabled. + */ +#define _CALLBACKF_mask_events 0 +#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events) + +/* + * Register a callback. + */ +#define CALLBACKOP_register 0 +struct callback_register { + uint16_t type; + uint16_t flags; + xen_callback_t address; +}; +typedef struct callback_register callback_register_t; +DEFINE_XEN_GUEST_HANDLE(callback_register_t); + +/* + * Unregister a callback. + * + * Not all callbacks can be unregistered. -EINVAL will be returned if + * you attempt to unregister such a callback. + */ +#define CALLBACKOP_unregister 1 +struct callback_unregister { + uint16_t type; + uint16_t _unused; +}; +typedef struct callback_unregister callback_unregister_t; +DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00030207 +#undef CALLBACKTYPE_sysenter +#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated +#endif + +#endif /* __XEN_PUBLIC_CALLBACK_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/COPYING xen-4.9.2/extras/mini-os/include/xen/COPYING --- xen-4.9.0/extras/mini-os/include/xen/COPYING 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/COPYING 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,38 @@ +XEN NOTICE +========== + +This copyright applies to all files within this subdirectory and its +subdirectories: + include/public/*.h + include/public/hvm/*.h + include/public/io/*.h + +The intention is that these files can be freely copied into the source +tree of an operating system when porting that OS to run on Xen. Doing +so does *not* cause the OS to become subject to the terms of the GPL. + +All other files in the Xen source distribution are covered by version +2 of the GNU General Public License except where explicitly stated +otherwise within individual source files. + + -- Keir Fraser (on behalf of the Xen team) + +===================================================================== + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff -Nru xen-4.9.0/extras/mini-os/include/xen/dom0_ops.h xen-4.9.2/extras/mini-os/include/xen/dom0_ops.h --- xen-4.9.0/extras/mini-os/include/xen/dom0_ops.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/dom0_ops.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,120 @@ +/****************************************************************************** + * dom0_ops.h + * + * Process command requests from domain-0 guest OS. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2003, B Dragovic + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_DOM0_OPS_H__ +#define __XEN_PUBLIC_DOM0_OPS_H__ + +#include "xen.h" +#include "platform.h" + +#if __XEN_INTERFACE_VERSION__ >= 0x00030204 +#error "dom0_ops.h is a compatibility interface only" +#endif + +#define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION + +#define DOM0_SETTIME XENPF_settime +#define dom0_settime xenpf_settime +#define dom0_settime_t xenpf_settime_t + +#define DOM0_ADD_MEMTYPE XENPF_add_memtype +#define dom0_add_memtype xenpf_add_memtype +#define dom0_add_memtype_t xenpf_add_memtype_t + +#define DOM0_DEL_MEMTYPE XENPF_del_memtype +#define dom0_del_memtype xenpf_del_memtype +#define dom0_del_memtype_t xenpf_del_memtype_t + +#define DOM0_READ_MEMTYPE XENPF_read_memtype +#define dom0_read_memtype xenpf_read_memtype +#define dom0_read_memtype_t xenpf_read_memtype_t + +#define DOM0_MICROCODE XENPF_microcode_update +#define dom0_microcode xenpf_microcode_update +#define dom0_microcode_t xenpf_microcode_update_t + +#define DOM0_PLATFORM_QUIRK XENPF_platform_quirk +#define dom0_platform_quirk xenpf_platform_quirk +#define dom0_platform_quirk_t xenpf_platform_quirk_t + +typedef uint64_t cpumap_t; + +/* Unsupported legacy operation -- defined for API compatibility. */ +#define DOM0_MSR 15 +struct dom0_msr { + /* IN variables. */ + uint32_t write; + cpumap_t cpu_mask; + uint32_t msr; + uint32_t in1; + uint32_t in2; + /* OUT variables. */ + uint32_t out1; + uint32_t out2; +}; +typedef struct dom0_msr dom0_msr_t; +DEFINE_XEN_GUEST_HANDLE(dom0_msr_t); + +/* Unsupported legacy operation -- defined for API compatibility. */ +#define DOM0_PHYSICAL_MEMORY_MAP 40 +struct dom0_memory_map_entry { + uint64_t start, end; + uint32_t flags; /* reserved */ + uint8_t is_ram; +}; +typedef struct dom0_memory_map_entry dom0_memory_map_entry_t; +DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t); + +struct dom0_op { + uint32_t cmd; + uint32_t interface_version; /* DOM0_INTERFACE_VERSION */ + union { + struct dom0_msr msr; + struct dom0_settime settime; + struct dom0_add_memtype add_memtype; + struct dom0_del_memtype del_memtype; + struct dom0_read_memtype read_memtype; + struct dom0_microcode microcode; + struct dom0_platform_quirk platform_quirk; + struct dom0_memory_map_entry physical_memory_map; + uint8_t pad[128]; + } u; +}; +typedef struct dom0_op dom0_op_t; +DEFINE_XEN_GUEST_HANDLE(dom0_op_t); + +#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/domctl.h xen-4.9.2/extras/mini-os/include/xen/domctl.h --- xen-4.9.0/extras/mini-os/include/xen/domctl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/domctl.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,1154 @@ +/****************************************************************************** + * domctl.h + * + * Domain management operations. For use by node control stack. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2003, B Dragovic + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_DOMCTL_H__ +#define __XEN_PUBLIC_DOMCTL_H__ + +#if !defined(__XEN__) && !defined(__XEN_TOOLS__) +#error "domctl operations are intended for use by node control tools only" +#endif + +#include "xen.h" +#include "grant_table.h" +#include "hvm/save.h" +#include "memory.h" + +#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a + +/* + * NB. xen_domctl.domain is an IN/OUT parameter for this operation. + * If it is specified as zero, an id is auto-allocated and returned. + */ +/* XEN_DOMCTL_createdomain */ +struct xen_domctl_createdomain { + /* IN parameters */ + uint32_t ssidref; + xen_domain_handle_t handle; + /* Is this an HVM guest (as opposed to a PVH or PV guest)? */ +#define _XEN_DOMCTL_CDF_hvm_guest 0 +#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) + /* Use hardware-assisted paging if available? */ +#define _XEN_DOMCTL_CDF_hap 1 +#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap) + /* Should domain memory integrity be verifed by tboot during Sx? */ +#define _XEN_DOMCTL_CDF_s3_integrity 2 +#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity) + /* Disable out-of-sync shadow page tables? */ +#define _XEN_DOMCTL_CDF_oos_off 3 +#define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off) + /* Is this a PVH guest (as opposed to an HVM or PV guest)? */ +#define _XEN_DOMCTL_CDF_pvh_guest 4 +#define XEN_DOMCTL_CDF_pvh_guest (1U<<_XEN_DOMCTL_CDF_pvh_guest) + uint32_t flags; +}; +typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); + +#if defined(__arm__) || defined(__aarch64__) +#define XEN_DOMCTL_CONFIG_GIC_DEFAULT 0 +#define XEN_DOMCTL_CONFIG_GIC_V2 1 +#define XEN_DOMCTL_CONFIG_GIC_V3 2 +/* XEN_DOMCTL_configure_domain */ +struct xen_domctl_arm_configuredomain { + /* IN/OUT parameters */ + uint8_t gic_version; +}; +typedef struct xen_domctl_arm_configuredomain xen_domctl_arm_configuredomain_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_arm_configuredomain_t); +#endif + +/* XEN_DOMCTL_getdomaininfo */ +struct xen_domctl_getdomaininfo { + /* OUT variables. */ + domid_t domain; /* Also echoed in domctl.domain */ + /* Domain is scheduled to die. */ +#define _XEN_DOMINF_dying 0 +#define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying) + /* Domain is an HVM guest (as opposed to a PV guest). */ +#define _XEN_DOMINF_hvm_guest 1 +#define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest) + /* The guest OS has shut down. */ +#define _XEN_DOMINF_shutdown 2 +#define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown) + /* Currently paused by control software. */ +#define _XEN_DOMINF_paused 3 +#define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused) + /* Currently blocked pending an event. */ +#define _XEN_DOMINF_blocked 4 +#define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked) + /* Domain is currently running. */ +#define _XEN_DOMINF_running 5 +#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running) + /* Being debugged. */ +#define _XEN_DOMINF_debugged 6 +#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) +/* domain is PVH */ +#define _XEN_DOMINF_pvh_guest 7 +#define XEN_DOMINF_pvh_guest (1U<<_XEN_DOMINF_pvh_guest) + /* XEN_DOMINF_shutdown guest-supplied code. */ +#define XEN_DOMINF_shutdownmask 255 +#define XEN_DOMINF_shutdownshift 16 + uint32_t flags; /* XEN_DOMINF_* */ + uint64_aligned_t tot_pages; + uint64_aligned_t max_pages; + uint64_aligned_t outstanding_pages; + uint64_aligned_t shr_pages; + uint64_aligned_t paged_pages; + uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ + uint64_aligned_t cpu_time; + uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ +#define XEN_INVALID_MAX_VCPU_ID (~0U) /* Domain has no vcpus? */ + uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ + uint32_t ssidref; + xen_domain_handle_t handle; + uint32_t cpupool; +}; +typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); + + +/* XEN_DOMCTL_getmemlist */ +struct xen_domctl_getmemlist { + /* IN variables. */ + /* Max entries to write to output buffer. */ + uint64_aligned_t max_pfns; + /* Start index in guest's page list. */ + uint64_aligned_t start_pfn; + XEN_GUEST_HANDLE_64(uint64) buffer; + /* OUT variables. */ + uint64_aligned_t num_pfns; +}; +typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); + + +/* XEN_DOMCTL_getpageframeinfo */ + +#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28 +#define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28) +#define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28) +#define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28) +#define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28) +#define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28) +#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28) +#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31) +#define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */ +#define XEN_DOMCTL_PFINFO_XALLOC (0xeU<<28) /* allocate-only page */ +#define XEN_DOMCTL_PFINFO_BROKEN (0xdU<<28) /* broken page */ +#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28) + +struct xen_domctl_getpageframeinfo { + /* IN variables. */ + uint64_aligned_t gmfn; /* GMFN to query */ + /* OUT variables. */ + /* Is the page PINNED to a type? */ + uint32_t type; /* see above type defs */ +}; +typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); + + +/* XEN_DOMCTL_getpageframeinfo2 */ +struct xen_domctl_getpageframeinfo2 { + /* IN variables. */ + uint64_aligned_t num; + /* IN/OUT variables. */ + XEN_GUEST_HANDLE_64(uint32) array; +}; +typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); + +/* XEN_DOMCTL_getpageframeinfo3 */ +struct xen_domctl_getpageframeinfo3 { + /* IN variables. */ + uint64_aligned_t num; + /* IN/OUT variables. */ + XEN_GUEST_HANDLE_64(xen_pfn_t) array; +}; + + +/* + * Control shadow pagetables operation + */ +/* XEN_DOMCTL_shadow_op */ + +/* Disable shadow mode. */ +#define XEN_DOMCTL_SHADOW_OP_OFF 0 + +/* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE 32 + +/* Log-dirty bitmap operations. */ + /* Return the bitmap and clean internal copy for next round. */ +#define XEN_DOMCTL_SHADOW_OP_CLEAN 11 + /* Return the bitmap but do not modify internal copy. */ +#define XEN_DOMCTL_SHADOW_OP_PEEK 12 + +/* Memory allocation accessors. */ +#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION 30 +#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION 31 + +/* Legacy enable operations. */ + /* Equiv. to ENABLE with no mode flags. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST 1 + /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY 2 + /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE 3 + +/* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */ + /* + * Shadow pagetables are refcounted: guest does not use explicit mmu + * operations nor write-protect its pagetables. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT (1 << 1) + /* + * Log pages in a bitmap as they are dirtied. + * Used for live relocation to determine which pages must be re-sent. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2) + /* + * Automatically translate GPFNs into MFNs. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3) + /* + * Xen does not steal virtual address space from the guest. + * Requires HVM support. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4) + +struct xen_domctl_shadow_op_stats { + uint32_t fault_count; + uint32_t dirty_count; +}; +typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t); + +struct xen_domctl_shadow_op { + /* IN variables. */ + uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */ + + /* OP_ENABLE */ + uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */ + + /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */ + uint32_t mb; /* Shadow memory allocation in MB */ + + /* OP_PEEK / OP_CLEAN */ + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; + uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */ + struct xen_domctl_shadow_op_stats stats; +}; +typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t); + + +/* XEN_DOMCTL_max_mem */ +struct xen_domctl_max_mem { + /* IN variables. */ + uint64_aligned_t max_memkb; +}; +typedef struct xen_domctl_max_mem xen_domctl_max_mem_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); + + +/* XEN_DOMCTL_setvcpucontext */ +/* XEN_DOMCTL_getvcpucontext */ +struct xen_domctl_vcpucontext { + uint32_t vcpu; /* IN */ + XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ +}; +typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); + + +/* XEN_DOMCTL_getvcpuinfo */ +struct xen_domctl_getvcpuinfo { + /* IN variables. */ + uint32_t vcpu; + /* OUT variables. */ + uint8_t online; /* currently online (not hotplugged)? */ + uint8_t blocked; /* blocked waiting for an event? */ + uint8_t running; /* currently scheduled on its CPU? */ + uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */ + uint32_t cpu; /* current mapping */ +}; +typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t); + + +/* Get/set the NUMA node(s) with which the guest has affinity with. */ +/* XEN_DOMCTL_setnodeaffinity */ +/* XEN_DOMCTL_getnodeaffinity */ +struct xen_domctl_nodeaffinity { + struct xenctl_bitmap nodemap;/* IN */ +}; +typedef struct xen_domctl_nodeaffinity xen_domctl_nodeaffinity_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_nodeaffinity_t); + + +/* Get/set which physical cpus a vcpu can execute on. */ +/* XEN_DOMCTL_setvcpuaffinity */ +/* XEN_DOMCTL_getvcpuaffinity */ +struct xen_domctl_vcpuaffinity { + /* IN variables. */ + uint32_t vcpu; + /* Set/get the hard affinity for vcpu */ +#define _XEN_VCPUAFFINITY_HARD 0 +#define XEN_VCPUAFFINITY_HARD (1U<<_XEN_VCPUAFFINITY_HARD) + /* Set/get the soft affinity for vcpu */ +#define _XEN_VCPUAFFINITY_SOFT 1 +#define XEN_VCPUAFFINITY_SOFT (1U<<_XEN_VCPUAFFINITY_SOFT) + uint32_t flags; + /* + * IN/OUT variables. + * + * Both are IN/OUT for XEN_DOMCTL_setvcpuaffinity, in which case they + * contain effective hard or/and soft affinity. That is, upon successful + * return, cpumap_soft, contains the intersection of the soft affinity, + * hard affinity and the cpupool's online CPUs for the domain (if + * XEN_VCPUAFFINITY_SOFT was set in flags). cpumap_hard contains the + * intersection between hard affinity and the cpupool's online CPUs (if + * XEN_VCPUAFFINITY_HARD was set in flags). + * + * Both are OUT-only for XEN_DOMCTL_getvcpuaffinity, in which case they + * contain the plain hard and/or soft affinity masks that were set during + * previous successful calls to XEN_DOMCTL_setvcpuaffinity (or the + * default values), without intersecting or altering them in any way. + */ + struct xenctl_bitmap cpumap_hard; + struct xenctl_bitmap cpumap_soft; +}; +typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t); + + +/* XEN_DOMCTL_max_vcpus */ +struct xen_domctl_max_vcpus { + uint32_t max; /* maximum number of vcpus */ +}; +typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); + + +/* XEN_DOMCTL_scheduler_op */ +/* Scheduler types. */ +#define XEN_SCHEDULER_SEDF 4 +#define XEN_SCHEDULER_CREDIT 5 +#define XEN_SCHEDULER_CREDIT2 6 +#define XEN_SCHEDULER_ARINC653 7 +#define XEN_SCHEDULER_RTDS 8 + +/* Set or get info? */ +#define XEN_DOMCTL_SCHEDOP_putinfo 0 +#define XEN_DOMCTL_SCHEDOP_getinfo 1 +struct xen_domctl_scheduler_op { + uint32_t sched_id; /* XEN_SCHEDULER_* */ + uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */ + union { + struct xen_domctl_sched_sedf { + uint64_aligned_t period; + uint64_aligned_t slice; + uint64_aligned_t latency; + uint32_t extratime; + uint32_t weight; + } sedf; + struct xen_domctl_sched_credit { + uint16_t weight; + uint16_t cap; + } credit; + struct xen_domctl_sched_credit2 { + uint16_t weight; + } credit2; + struct xen_domctl_sched_rtds { + uint32_t period; + uint32_t budget; + } rtds; + } u; +}; +typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t); + + +/* XEN_DOMCTL_setdomainhandle */ +struct xen_domctl_setdomainhandle { + xen_domain_handle_t handle; +}; +typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t); + + +/* XEN_DOMCTL_setdebugging */ +struct xen_domctl_setdebugging { + uint8_t enable; +}; +typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t); + + +/* XEN_DOMCTL_irq_permission */ +struct xen_domctl_irq_permission { + uint8_t pirq; + uint8_t allow_access; /* flag to specify enable/disable of IRQ access */ +}; +typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t); + + +/* XEN_DOMCTL_iomem_permission */ +struct xen_domctl_iomem_permission { + uint64_aligned_t first_mfn;/* first page (physical page number) in range */ + uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ + uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ +}; +typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t); + + +/* XEN_DOMCTL_ioport_permission */ +struct xen_domctl_ioport_permission { + uint32_t first_port; /* first port int range */ + uint32_t nr_ports; /* size of port range */ + uint8_t allow_access; /* allow or deny access to range? */ +}; +typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t); + + +/* XEN_DOMCTL_hypercall_init */ +struct xen_domctl_hypercall_init { + uint64_aligned_t gmfn; /* GMFN to be initialised */ +}; +typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t); + + +/* XEN_DOMCTL_settimeoffset */ +struct xen_domctl_settimeoffset { + int32_t time_offset_seconds; /* applied to domain wallclock time */ +}; +typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); + +/* XEN_DOMCTL_gethvmcontext */ +/* XEN_DOMCTL_sethvmcontext */ +typedef struct xen_domctl_hvmcontext { + uint32_t size; /* IN/OUT: size of buffer / bytes filled */ + XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call + * gethvmcontext with NULL + * buffer to get size req'd */ +} xen_domctl_hvmcontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); + + +/* XEN_DOMCTL_set_address_size */ +/* XEN_DOMCTL_get_address_size */ +typedef struct xen_domctl_address_size { + uint32_t size; +} xen_domctl_address_size_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t); + + +/* XEN_DOMCTL_sendtrigger */ +#define XEN_DOMCTL_SENDTRIGGER_NMI 0 +#define XEN_DOMCTL_SENDTRIGGER_RESET 1 +#define XEN_DOMCTL_SENDTRIGGER_INIT 2 +#define XEN_DOMCTL_SENDTRIGGER_POWER 3 +#define XEN_DOMCTL_SENDTRIGGER_SLEEP 4 +struct xen_domctl_sendtrigger { + uint32_t trigger; /* IN */ + uint32_t vcpu; /* IN */ +}; +typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t); + + +/* Assign PCI device to HVM guest. Sets up IOMMU structures. */ +/* XEN_DOMCTL_assign_device */ +/* XEN_DOMCTL_test_assign_device */ +/* XEN_DOMCTL_deassign_device */ +struct xen_domctl_assign_device { + uint32_t machine_sbdf; /* machine PCI ID of assigned device */ +}; +typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); + +/* Retrieve sibling devices infomation of machine_sbdf */ +/* XEN_DOMCTL_get_device_group */ +struct xen_domctl_get_device_group { + uint32_t machine_sbdf; /* IN */ + uint32_t max_sdevs; /* IN */ + uint32_t num_sdevs; /* OUT */ + XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */ +}; +typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t); + +/* Pass-through interrupts: bind real irq -> hvm devfn. */ +/* XEN_DOMCTL_bind_pt_irq */ +/* XEN_DOMCTL_unbind_pt_irq */ +typedef enum pt_irq_type_e { + PT_IRQ_TYPE_PCI, + PT_IRQ_TYPE_ISA, + PT_IRQ_TYPE_MSI, + PT_IRQ_TYPE_MSI_TRANSLATE, +} pt_irq_type_t; +struct xen_domctl_bind_pt_irq { + uint32_t machine_irq; + pt_irq_type_t irq_type; + uint32_t hvm_domid; + + union { + struct { + uint8_t isa_irq; + } isa; + struct { + uint8_t bus; + uint8_t device; + uint8_t intx; + } pci; + struct { + uint8_t gvec; + uint32_t gflags; + uint64_aligned_t gtable; + } msi; + } u; +}; +typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t); + + +/* Bind machine I/O address range -> HVM address range. */ +/* XEN_DOMCTL_memory_mapping */ +#define DPCI_ADD_MAPPING 1 +#define DPCI_REMOVE_MAPPING 0 +struct xen_domctl_memory_mapping { + uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */ + uint64_aligned_t first_mfn; /* first page (machine page) in range */ + uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ + uint32_t add_mapping; /* add or remove mapping */ + uint32_t padding; /* padding for 64-bit aligned structure */ +}; +typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t); + + +/* Bind machine I/O port range -> HVM I/O port range. */ +/* XEN_DOMCTL_ioport_mapping */ +struct xen_domctl_ioport_mapping { + uint32_t first_gport; /* first guest IO port*/ + uint32_t first_mport; /* first machine IO port */ + uint32_t nr_ports; /* size of port range */ + uint32_t add_mapping; /* add or remove mapping */ +}; +typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t); + + +/* + * Pin caching type of RAM space for x86 HVM domU. + */ +/* XEN_DOMCTL_pin_mem_cacheattr */ +/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */ +#define XEN_DOMCTL_MEM_CACHEATTR_UC 0 +#define XEN_DOMCTL_MEM_CACHEATTR_WC 1 +#define XEN_DOMCTL_MEM_CACHEATTR_WT 4 +#define XEN_DOMCTL_MEM_CACHEATTR_WP 5 +#define XEN_DOMCTL_MEM_CACHEATTR_WB 6 +#define XEN_DOMCTL_MEM_CACHEATTR_UCM 7 +#define XEN_DOMCTL_DELETE_MEM_CACHEATTR (~(uint32_t)0) +struct xen_domctl_pin_mem_cacheattr { + uint64_aligned_t start, end; + uint32_t type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ +}; +typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t); + + +/* XEN_DOMCTL_set_ext_vcpucontext */ +/* XEN_DOMCTL_get_ext_vcpucontext */ +struct xen_domctl_ext_vcpucontext { + /* IN: VCPU that this call applies to. */ + uint32_t vcpu; + /* + * SET: Size of struct (IN) + * GET: Size of struct (OUT, up to 128 bytes) + */ + uint32_t size; +#if defined(__i386__) || defined(__x86_64__) + /* SYSCALL from 32-bit mode and SYSENTER callback information. */ + /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */ + uint64_aligned_t syscall32_callback_eip; + uint64_aligned_t sysenter_callback_eip; + uint16_t syscall32_callback_cs; + uint16_t sysenter_callback_cs; + uint8_t syscall32_disables_events; + uint8_t sysenter_disables_events; +#if defined(__GNUC__) + union { + uint64_aligned_t mcg_cap; + struct hvm_vmce_vcpu vmce; + }; +#else + struct hvm_vmce_vcpu vmce; +#endif +#endif +}; +typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t); + +/* + * Set the target domain for a domain + */ +/* XEN_DOMCTL_set_target */ +struct xen_domctl_set_target { + domid_t target; +}; +typedef struct xen_domctl_set_target xen_domctl_set_target_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t); + +#if defined(__i386__) || defined(__x86_64__) +# define XEN_CPUID_INPUT_UNUSED 0xFFFFFFFF +/* XEN_DOMCTL_set_cpuid */ +struct xen_domctl_cpuid { + uint32_t input[2]; + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; +}; +typedef struct xen_domctl_cpuid xen_domctl_cpuid_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t); +#endif + +/* + * Arranges that if the domain suspends (specifically, if it shuts + * down with code SHUTDOWN_suspend), this event channel will be + * notified. + * + * This is _instead of_ the usual notification to the global + * VIRQ_DOM_EXC. (In most systems that pirq is owned by xenstored.) + * + * Only one subscription per domain is possible. Last subscriber + * wins; others are silently displaced. + * + * NB that contrary to the rather general name, it only applies to + * domain shutdown with code suspend. Shutdown for other reasons + * (including crash), and domain death, are notified to VIRQ_DOM_EXC + * regardless. + */ +/* XEN_DOMCTL_subscribe */ +struct xen_domctl_subscribe { + uint32_t port; /* IN */ +}; +typedef struct xen_domctl_subscribe xen_domctl_subscribe_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t); + +/* + * Define the maximum machine address size which should be allocated + * to a guest. + */ +/* XEN_DOMCTL_set_machine_address_size */ +/* XEN_DOMCTL_get_machine_address_size */ + +/* + * Do not inject spurious page faults into this domain. + */ +/* XEN_DOMCTL_suppress_spurious_page_faults */ + +/* XEN_DOMCTL_debug_op */ +#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF 0 +#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON 1 +struct xen_domctl_debug_op { + uint32_t op; /* IN */ + uint32_t vcpu; /* IN */ +}; +typedef struct xen_domctl_debug_op xen_domctl_debug_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t); + +/* + * Request a particular record from the HVM context + */ +/* XEN_DOMCTL_gethvmcontext_partial */ +typedef struct xen_domctl_hvmcontext_partial { + uint32_t type; /* IN: Type of record required */ + uint32_t instance; /* IN: Instance of that type */ + XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */ +} xen_domctl_hvmcontext_partial_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t); + +/* XEN_DOMCTL_disable_migrate */ +typedef struct xen_domctl_disable_migrate { + uint32_t disable; /* IN: 1: disable migration and restore */ +} xen_domctl_disable_migrate_t; + + +/* XEN_DOMCTL_gettscinfo */ +/* XEN_DOMCTL_settscinfo */ +struct xen_guest_tsc_info { + uint32_t tsc_mode; + uint32_t gtsc_khz; + uint32_t incarnation; + uint32_t pad; + uint64_aligned_t elapsed_nsec; +}; +typedef struct xen_guest_tsc_info xen_guest_tsc_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_guest_tsc_info_t); +typedef struct xen_domctl_tsc_info { + XEN_GUEST_HANDLE_64(xen_guest_tsc_info_t) out_info; /* OUT */ + xen_guest_tsc_info_t info; /* IN */ +} xen_domctl_tsc_info_t; + +/* XEN_DOMCTL_gdbsx_guestmemio guest mem io */ +struct xen_domctl_gdbsx_memio { + /* IN */ + uint64_aligned_t pgd3val;/* optional: init_mm.pgd[3] value */ + uint64_aligned_t gva; /* guest virtual address */ + uint64_aligned_t uva; /* user buffer virtual address */ + uint32_t len; /* number of bytes to read/write */ + uint8_t gwr; /* 0 = read from guest. 1 = write to guest */ + /* OUT */ + uint32_t remain; /* bytes remaining to be copied */ +}; + +/* XEN_DOMCTL_gdbsx_pausevcpu */ +/* XEN_DOMCTL_gdbsx_unpausevcpu */ +struct xen_domctl_gdbsx_pauseunp_vcpu { /* pause/unpause a vcpu */ + uint32_t vcpu; /* which vcpu */ +}; + +/* XEN_DOMCTL_gdbsx_domstatus */ +struct xen_domctl_gdbsx_domstatus { + /* OUT */ + uint8_t paused; /* is the domain paused */ + uint32_t vcpu_id; /* any vcpu in an event? */ + uint32_t vcpu_ev; /* if yes, what event? */ +}; + +/* + * Memory event operations + */ + +/* XEN_DOMCTL_mem_event_op */ + +/* + * Domain memory paging + * Page memory in and out. + * Domctl interface to set up and tear down the + * pager<->hypervisor interface. Use XENMEM_paging_op* + * to perform per-page operations. + * + * The XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE domctl returns several + * non-standard error codes to indicate why paging could not be enabled: + * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest + * EMLINK - guest has iommu passthrough enabled + * EXDEV - guest has PoD enabled + * EBUSY - guest has or had paging enabled, ring buffer still active + */ +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING 1 + +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE 1 + +/* + * Access permissions. + * + * As with paging, use the domctl for teardown/setup of the + * helper<->hypervisor interface. + * + * There are HVM hypercalls to set the per-page access permissions of every + * page in a domain. When one of these permissions--independent, read, + * write, and execute--is violated, the VCPU is paused and a memory event + * is sent with what happened. (See public/mem_event.h) . + * + * The memory event handler can then resume the VCPU and redo the access + * with a XENMEM_access_op_resume hypercall. + * + * The XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE domctl returns several + * non-standard error codes to indicate why access could not be enabled: + * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest + * EBUSY - guest has or had access enabled, ring buffer still active + */ +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS 2 + +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE 1 +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION 2 + +/* + * Sharing ENOMEM helper. + * + * As with paging, use the domctl for teardown/setup of the + * helper<->hypervisor interface. + * + * If setup, this ring is used to communicate failed allocations + * in the unshare path. XENMEM_sharing_op_resume is used to wake up + * vcpus that could not unshare. + * + * Note that shring can be turned on (as per the domctl below) + * *without* this ring being setup. + */ +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING 3 + +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DISABLE 1 + +/* Use for teardown/setup of helper<->hypervisor interface for paging, + * access and sharing.*/ +struct xen_domctl_mem_event_op { + uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ + uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ + + uint32_t port; /* OUT: event channel for ring */ +}; +typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t); + +/* + * Memory sharing operations + */ +/* XEN_DOMCTL_mem_sharing_op. + * The CONTROL sub-domctl is used for bringup/teardown. */ +#define XEN_DOMCTL_MEM_SHARING_CONTROL 0 + +struct xen_domctl_mem_sharing_op { + uint8_t op; /* XEN_DOMCTL_MEM_SHARING_* */ + + union { + uint8_t enable; /* CONTROL */ + } u; +}; +typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t); + +struct xen_domctl_audit_p2m { + /* OUT error counts */ + uint64_t orphans; + uint64_t m2p_bad; + uint64_t p2m_bad; +}; +typedef struct xen_domctl_audit_p2m xen_domctl_audit_p2m_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_audit_p2m_t); + +struct xen_domctl_set_virq_handler { + uint32_t virq; /* IN */ +}; +typedef struct xen_domctl_set_virq_handler xen_domctl_set_virq_handler_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_virq_handler_t); + +#if defined(__i386__) || defined(__x86_64__) +/* XEN_DOMCTL_setvcpuextstate */ +/* XEN_DOMCTL_getvcpuextstate */ +struct xen_domctl_vcpuextstate { + /* IN: VCPU that this call applies to. */ + uint32_t vcpu; + /* + * SET: Ignored. + * GET: xfeature support mask of struct (IN/OUT) + * xfeature mask is served as identifications of the saving format + * so that compatible CPUs can have a check on format to decide + * whether it can restore. + */ + uint64_aligned_t xfeature_mask; + /* + * SET: Size of struct (IN) + * GET: Size of struct (IN/OUT) + */ + uint64_aligned_t size; + XEN_GUEST_HANDLE_64(uint64) buffer; +}; +typedef struct xen_domctl_vcpuextstate xen_domctl_vcpuextstate_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t); +#endif + +/* XEN_DOMCTL_set_access_required: sets whether a memory event listener + * must be present to handle page access events: if false, the page + * access will revert to full permissions if no one is listening; + * */ +struct xen_domctl_set_access_required { + uint8_t access_required; +}; +typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); + +struct xen_domctl_set_broken_page_p2m { + uint64_aligned_t pfn; +}; +typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t); + +/* + * XEN_DOMCTL_set_max_evtchn: sets the maximum event channel port + * number the guest may use. Use this limit the amount of resources + * (global mapping space, xenheap) a guest may use for event channels. + */ +struct xen_domctl_set_max_evtchn { + uint32_t max_port; +}; +typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t); + +/* + * ARM: Clean and invalidate caches associated with given region of + * guest memory. + */ +struct xen_domctl_cacheflush { + /* IN: page range to flush. */ + xen_pfn_t start_pfn, nr_pfns; +}; +typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t); + +#if defined(__i386__) || defined(__x86_64__) +struct xen_domctl_vcpu_msr { + uint32_t index; + uint32_t reserved; + uint64_aligned_t value; +}; +typedef struct xen_domctl_vcpu_msr xen_domctl_vcpu_msr_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msr_t); + +/* + * XEN_DOMCTL_set_vcpu_msrs / XEN_DOMCTL_get_vcpu_msrs. + * + * Input: + * - A NULL 'msrs' guest handle is a request for the maximum 'msr_count'. + * - Otherwise, 'msr_count' is the number of entries in 'msrs'. + * + * Output for get: + * - If 'msr_count' is less than the number Xen needs to write, -ENOBUFS shall + * be returned and 'msr_count' updated to reflect the intended number. + * - On success, 'msr_count' shall indicate the number of MSRs written, which + * may be less than the maximum if some are not currently used by the vcpu. + * + * Output for set: + * - If Xen encounters an error with a specific MSR, -EINVAL shall be returned + * and 'msr_count' shall be set to the offending index, to aid debugging. + */ +struct xen_domctl_vcpu_msrs { + uint32_t vcpu; /* IN */ + uint32_t msr_count; /* IN/OUT */ + XEN_GUEST_HANDLE_64(xen_domctl_vcpu_msr_t) msrs; /* IN/OUT */ +}; +typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t); +#endif + +/* + * Use in XEN_DOMCTL_setvnumainfo to set + * vNUMA domain topology. + */ +struct xen_domctl_vnuma { + uint32_t nr_vnodes; + uint32_t nr_vmemranges; + uint32_t nr_vcpus; + uint32_t pad; + XEN_GUEST_HANDLE_64(uint) vdistance; + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; + + /* + * vnodes to physical NUMA nodes mask. + * This kept on per-domain basis for + * interested consumers, such as numa aware ballooning. + */ + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; + + /* + * memory rages for each vNUMA node + */ + XEN_GUEST_HANDLE_64(xen_vmemrange_t) vmemrange; +}; +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); + +struct xen_domctl_psr_cmt_op { +#define XEN_DOMCTL_PSR_CMT_OP_DETACH 0 +#define XEN_DOMCTL_PSR_CMT_OP_ATTACH 1 +#define XEN_DOMCTL_PSR_CMT_OP_QUERY_RMID 2 + uint32_t cmd; + uint32_t data; +}; +typedef struct xen_domctl_psr_cmt_op xen_domctl_psr_cmt_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cmt_op_t); + +struct xen_domctl { + uint32_t cmd; +#define XEN_DOMCTL_createdomain 1 +#define XEN_DOMCTL_destroydomain 2 +#define XEN_DOMCTL_pausedomain 3 +#define XEN_DOMCTL_unpausedomain 4 +#define XEN_DOMCTL_getdomaininfo 5 +#define XEN_DOMCTL_getmemlist 6 +#define XEN_DOMCTL_getpageframeinfo 7 +#define XEN_DOMCTL_getpageframeinfo2 8 +#define XEN_DOMCTL_setvcpuaffinity 9 +#define XEN_DOMCTL_shadow_op 10 +#define XEN_DOMCTL_max_mem 11 +#define XEN_DOMCTL_setvcpucontext 12 +#define XEN_DOMCTL_getvcpucontext 13 +#define XEN_DOMCTL_getvcpuinfo 14 +#define XEN_DOMCTL_max_vcpus 15 +#define XEN_DOMCTL_scheduler_op 16 +#define XEN_DOMCTL_setdomainhandle 17 +#define XEN_DOMCTL_setdebugging 18 +#define XEN_DOMCTL_irq_permission 19 +#define XEN_DOMCTL_iomem_permission 20 +#define XEN_DOMCTL_ioport_permission 21 +#define XEN_DOMCTL_hypercall_init 22 +#define XEN_DOMCTL_arch_setup 23 /* Obsolete IA64 only */ +#define XEN_DOMCTL_settimeoffset 24 +#define XEN_DOMCTL_getvcpuaffinity 25 +#define XEN_DOMCTL_real_mode_area 26 /* Obsolete PPC only */ +#define XEN_DOMCTL_resumedomain 27 +#define XEN_DOMCTL_sendtrigger 28 +#define XEN_DOMCTL_subscribe 29 +#define XEN_DOMCTL_gethvmcontext 33 +#define XEN_DOMCTL_sethvmcontext 34 +#define XEN_DOMCTL_set_address_size 35 +#define XEN_DOMCTL_get_address_size 36 +#define XEN_DOMCTL_assign_device 37 +#define XEN_DOMCTL_bind_pt_irq 38 +#define XEN_DOMCTL_memory_mapping 39 +#define XEN_DOMCTL_ioport_mapping 40 +#define XEN_DOMCTL_pin_mem_cacheattr 41 +#define XEN_DOMCTL_set_ext_vcpucontext 42 +#define XEN_DOMCTL_get_ext_vcpucontext 43 +#define XEN_DOMCTL_set_opt_feature 44 /* Obsolete IA64 only */ +#define XEN_DOMCTL_test_assign_device 45 +#define XEN_DOMCTL_set_target 46 +#define XEN_DOMCTL_deassign_device 47 +#define XEN_DOMCTL_unbind_pt_irq 48 +#define XEN_DOMCTL_set_cpuid 49 +#define XEN_DOMCTL_get_device_group 50 +#define XEN_DOMCTL_set_machine_address_size 51 +#define XEN_DOMCTL_get_machine_address_size 52 +#define XEN_DOMCTL_suppress_spurious_page_faults 53 +#define XEN_DOMCTL_debug_op 54 +#define XEN_DOMCTL_gethvmcontext_partial 55 +#define XEN_DOMCTL_mem_event_op 56 +#define XEN_DOMCTL_mem_sharing_op 57 +#define XEN_DOMCTL_disable_migrate 58 +#define XEN_DOMCTL_gettscinfo 59 +#define XEN_DOMCTL_settscinfo 60 +#define XEN_DOMCTL_getpageframeinfo3 61 +#define XEN_DOMCTL_setvcpuextstate 62 +#define XEN_DOMCTL_getvcpuextstate 63 +#define XEN_DOMCTL_set_access_required 64 +#define XEN_DOMCTL_audit_p2m 65 +#define XEN_DOMCTL_set_virq_handler 66 +#define XEN_DOMCTL_set_broken_page_p2m 67 +#define XEN_DOMCTL_setnodeaffinity 68 +#define XEN_DOMCTL_getnodeaffinity 69 +#define XEN_DOMCTL_set_max_evtchn 70 +#define XEN_DOMCTL_cacheflush 71 +#define XEN_DOMCTL_get_vcpu_msrs 72 +#define XEN_DOMCTL_set_vcpu_msrs 73 +#define XEN_DOMCTL_setvnumainfo 74 +#define XEN_DOMCTL_psr_cmt_op 75 +#define XEN_DOMCTL_arm_configure_domain 76 +#define XEN_DOMCTL_gdbsx_guestmemio 1000 +#define XEN_DOMCTL_gdbsx_pausevcpu 1001 +#define XEN_DOMCTL_gdbsx_unpausevcpu 1002 +#define XEN_DOMCTL_gdbsx_domstatus 1003 + uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ + domid_t domain; + union { + struct xen_domctl_createdomain createdomain; +#if defined(__arm__) || defined(__aarch64__) + struct xen_domctl_arm_configuredomain configuredomain; +#endif + struct xen_domctl_getdomaininfo getdomaininfo; + struct xen_domctl_getmemlist getmemlist; + struct xen_domctl_getpageframeinfo getpageframeinfo; + struct xen_domctl_getpageframeinfo2 getpageframeinfo2; + struct xen_domctl_getpageframeinfo3 getpageframeinfo3; + struct xen_domctl_nodeaffinity nodeaffinity; + struct xen_domctl_vcpuaffinity vcpuaffinity; + struct xen_domctl_shadow_op shadow_op; + struct xen_domctl_max_mem max_mem; + struct xen_domctl_vcpucontext vcpucontext; + struct xen_domctl_getvcpuinfo getvcpuinfo; + struct xen_domctl_max_vcpus max_vcpus; + struct xen_domctl_scheduler_op scheduler_op; + struct xen_domctl_setdomainhandle setdomainhandle; + struct xen_domctl_setdebugging setdebugging; + struct xen_domctl_irq_permission irq_permission; + struct xen_domctl_iomem_permission iomem_permission; + struct xen_domctl_ioport_permission ioport_permission; + struct xen_domctl_hypercall_init hypercall_init; + struct xen_domctl_settimeoffset settimeoffset; + struct xen_domctl_disable_migrate disable_migrate; + struct xen_domctl_tsc_info tsc_info; + struct xen_domctl_hvmcontext hvmcontext; + struct xen_domctl_hvmcontext_partial hvmcontext_partial; + struct xen_domctl_address_size address_size; + struct xen_domctl_sendtrigger sendtrigger; + struct xen_domctl_get_device_group get_device_group; + struct xen_domctl_assign_device assign_device; + struct xen_domctl_bind_pt_irq bind_pt_irq; + struct xen_domctl_memory_mapping memory_mapping; + struct xen_domctl_ioport_mapping ioport_mapping; + struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr; + struct xen_domctl_ext_vcpucontext ext_vcpucontext; + struct xen_domctl_set_target set_target; + struct xen_domctl_subscribe subscribe; + struct xen_domctl_debug_op debug_op; + struct xen_domctl_mem_event_op mem_event_op; + struct xen_domctl_mem_sharing_op mem_sharing_op; +#if defined(__i386__) || defined(__x86_64__) + struct xen_domctl_cpuid cpuid; + struct xen_domctl_vcpuextstate vcpuextstate; + struct xen_domctl_vcpu_msrs vcpu_msrs; +#endif + struct xen_domctl_set_access_required access_required; + struct xen_domctl_audit_p2m audit_p2m; + struct xen_domctl_set_virq_handler set_virq_handler; + struct xen_domctl_set_max_evtchn set_max_evtchn; + struct xen_domctl_gdbsx_memio gdbsx_guest_memio; + struct xen_domctl_set_broken_page_p2m set_broken_page_p2m; + struct xen_domctl_cacheflush cacheflush; + struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; + struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; + struct xen_domctl_vnuma vnuma; + struct xen_domctl_psr_cmt_op psr_cmt_op; + uint8_t pad[128]; + } u; +}; +typedef struct xen_domctl xen_domctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); + +#endif /* __XEN_PUBLIC_DOMCTL_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/elfnote.h xen-4.9.2/extras/mini-os/include/xen/elfnote.h --- xen-4.9.0/extras/mini-os/include/xen/elfnote.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/elfnote.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,281 @@ +/****************************************************************************** + * elfnote.h + * + * Definitions used for the Xen ELF notes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Ian Campbell, XenSource Ltd. + */ + +#ifndef __XEN_PUBLIC_ELFNOTE_H__ +#define __XEN_PUBLIC_ELFNOTE_H__ + +/* + * `incontents 200 elfnotes ELF notes + * + * The notes should live in a PT_NOTE segment and have "Xen" in the + * name field. + * + * Numeric types are either 4 or 8 bytes depending on the content of + * the desc field. + * + * LEGACY indicated the fields in the legacy __xen_guest string which + * this a note type replaces. + * + * String values (for non-legacy) are NULL terminated ASCII, also known + * as ASCIZ type. + */ + +/* + * NAME=VALUE pair (string). + */ +#define XEN_ELFNOTE_INFO 0 + +/* + * The virtual address of the entry point (numeric). + * + * LEGACY: VIRT_ENTRY + */ +#define XEN_ELFNOTE_ENTRY 1 + +/* The virtual address of the hypercall transfer page (numeric). + * + * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page + * number not a virtual address) + */ +#define XEN_ELFNOTE_HYPERCALL_PAGE 2 + +/* The virtual address where the kernel image should be mapped (numeric). + * + * Defaults to 0. + * + * LEGACY: VIRT_BASE + */ +#define XEN_ELFNOTE_VIRT_BASE 3 + +/* + * The offset of the ELF paddr field from the actual required + * pseudo-physical address (numeric). + * + * This is used to maintain backwards compatibility with older kernels + * which wrote __PAGE_OFFSET into that field. This field defaults to 0 + * if not present. + * + * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE) + */ +#define XEN_ELFNOTE_PADDR_OFFSET 4 + +/* + * The version of Xen that we work with (string). + * + * LEGACY: XEN_VER + */ +#define XEN_ELFNOTE_XEN_VERSION 5 + +/* + * The name of the guest operating system (string). + * + * LEGACY: GUEST_OS + */ +#define XEN_ELFNOTE_GUEST_OS 6 + +/* + * The version of the guest operating system (string). + * + * LEGACY: GUEST_VER + */ +#define XEN_ELFNOTE_GUEST_VERSION 7 + +/* + * The loader type (string). + * + * LEGACY: LOADER + */ +#define XEN_ELFNOTE_LOADER 8 + +/* + * The kernel supports PAE (x86/32 only, string = "yes", "no" or + * "bimodal"). + * + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting + * may be given as "yes,bimodal" which will cause older Xen to treat + * this kernel as PAE. + * + * LEGACY: PAE (n.b. The legacy interface included a provision to + * indicate 'extended-cr3' support allowing L3 page tables to be + * placed above 4G. It is assumed that any kernel new enough to use + * these ELF notes will include this and therefore "yes" here is + * equivalent to "yes[entended-cr3]" in the __xen_guest interface. + */ +#define XEN_ELFNOTE_PAE_MODE 9 + +/* + * The features supported/required by this kernel (string). + * + * The string must consist of a list of feature names (as given in + * features.h, without the "XENFEAT_" prefix) separated by '|' + * characters. If a feature is required for the kernel to function + * then the feature name must be preceded by a '!' character. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_FEATURES 10 + +/* + * The kernel requires the symbol table to be loaded (string = "yes" or "no") + * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence + * of this string as a boolean flag rather than requiring "yes" or + * "no". + */ +#define XEN_ELFNOTE_BSD_SYMTAB 11 + +/* + * The lowest address the hypervisor hole can begin at (numeric). + * + * This must not be set higher than HYPERVISOR_VIRT_START. Its presence + * also indicates to the hypervisor that the kernel can deal with the + * hole starting at a higher address. + */ +#define XEN_ELFNOTE_HV_START_LOW 12 + +/* + * List of maddr_t-sized mask/value pairs describing how to recognize + * (non-present) L1 page table entries carrying valid MFNs (numeric). + */ +#define XEN_ELFNOTE_L1_MFN_VALID 13 + +/* + * Whether or not the guest supports cooperative suspend cancellation. + * This is a numeric value. + * + * Default is 0 + */ +#define XEN_ELFNOTE_SUSPEND_CANCEL 14 + +/* + * The (non-default) location the initial phys-to-machine map should be + * placed at by the hypervisor (Dom0) or the tools (DomU). + * The kernel must be prepared for this mapping to be established using + * large pages, despite such otherwise not being available to guests. + * The kernel must also be able to handle the page table pages used for + * this mapping not being accessible through the initial mapping. + * (Only x86-64 supports this at present.) + */ +#define XEN_ELFNOTE_INIT_P2M 15 + +/* + * Whether or not the guest can deal with being passed an initrd not + * mapped through its initial page tables. + */ +#define XEN_ELFNOTE_MOD_START_PFN 16 + +/* + * The features supported by this kernel (numeric). + * + * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a + * kernel to specify support for features that older hypervisors don't + * know about. The set of features 4.2 and newer hypervisors will + * consider supported by the kernel is the combination of the sets + * specified through this and the string note. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_SUPPORTED_FEATURES 17 + +/* + * Physical entry point into the kernel. + * + * 32bit entry point into the kernel. When requested to launch the + * guest kernel in a HVM container, Xen will use this entry point to + * launch the guest in 32bit protected mode with paging disabled. + * Ignored otherwise. + */ +#define XEN_ELFNOTE_PHYS32_ENTRY 18 + +/* + * The number of the highest elfnote defined. + */ +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY + +/* + * System information exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO + * note in case of a system crash. This note will contain various + * information about the system, see xen/include/xen/elfcore.h. + */ +#define XEN_ELFNOTE_CRASH_INFO 0x1000001 + +/* + * System registers exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS + * note per cpu in case of a system crash. This note is architecture + * specific and will contain registers not saved in the "CORE" note. + * See xen/include/xen/elfcore.h for more information. + */ +#define XEN_ELFNOTE_CRASH_REGS 0x1000002 + + +/* + * xen dump-core none note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE + * in its dump file to indicate that the file is xen dump-core + * file. This note doesn't have any other information. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 + +/* + * xen dump-core header note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER + * in its dump file. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 + +/* + * xen dump-core xen version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION + * in its dump file. It contains the xen version obtained via the + * XENVER hypercall. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 + +/* + * xen dump-core format version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION + * in its dump file. It contains a format version identifier. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 + +#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/event_channel.h xen-4.9.2/extras/mini-os/include/xen/event_channel.h --- xen-4.9.0/extras/mini-os/include/xen/event_channel.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/event_channel.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,385 @@ +/****************************************************************************** + * event_channel.h + * + * Event channels between domains. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, K A Fraser. + */ + +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ + +#include "xen.h" + +/* + * `incontents 150 evtchn Event Channels + * + * Event channels are the basic primitive provided by Xen for event + * notifications. An event is the Xen equivalent of a hardware + * interrupt. They essentially store one bit of information, the event + * of interest is signalled by transitioning this bit from 0 to 1. + * + * Notifications are received by a guest via an upcall from Xen, + * indicating when an event arrives (setting the bit). Further + * notifications are masked until the bit is cleared again (therefore, + * guests must check the value of the bit after re-enabling event + * delivery to ensure no missed notifications). + * + * Event notifications can be masked by setting a flag; this is + * equivalent to disabling interrupts and can be used to ensure + * atomicity of certain operations in the guest kernel. + * + * Event channels are represented by the evtchn_* fields in + * struct shared_info and struct vcpu_info. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args) + * ` + * @cmd == EVTCHNOP_* (event-channel operation). + * @args == struct evtchn_* Operation-specific extra arguments (NULL if none). + */ + +/* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */ +#define EVTCHNOP_bind_interdomain 0 +#define EVTCHNOP_bind_virq 1 +#define EVTCHNOP_bind_pirq 2 +#define EVTCHNOP_close 3 +#define EVTCHNOP_send 4 +#define EVTCHNOP_status 5 +#define EVTCHNOP_alloc_unbound 6 +#define EVTCHNOP_bind_ipi 7 +#define EVTCHNOP_bind_vcpu 8 +#define EVTCHNOP_unmask 9 +#define EVTCHNOP_reset 10 +#define EVTCHNOP_init_control 11 +#define EVTCHNOP_expand_array 12 +#define EVTCHNOP_set_priority 13 +/* ` } */ + +typedef uint32_t evtchn_port_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); + +/* + * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as + * accepting interdomain bindings from domain . A fresh port + * is allocated in and returned as . + * NOTES: + * 1. If the caller is unprivileged then must be DOMID_SELF. + * 2. may be DOMID_SELF, allowing loopback connections. + */ +struct evtchn_alloc_unbound { + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; +}; +typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t; + +/* + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between + * the calling domain and . must identify + * a port that is unbound and marked as accepting bindings from the calling + * domain. A fresh port is allocated in the calling domain and returned as + * . + * + * In case the peer domain has already tried to set our event channel + * pending, before it was bound, EVTCHNOP_bind_interdomain always sets + * the local event channel pending. + * + * The usual pattern of use, in the guest's upcall (or subsequent + * handler) is as follows: (Re-enable the event channel for subsequent + * signalling and then) check for the existence of whatever condition + * is being waited for by other means, and take whatever action is + * needed (if any). + * + * NOTES: + * 1. may be DOMID_SELF, allowing loopback connections. + */ +struct evtchn_bind_interdomain { + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; +}; +typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t; + +/* + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ on specified + * vcpu. + * NOTES: + * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list + * in xen.h for the classification of each VIRQ. + * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be + * re-bound via EVTCHNOP_bind_vcpu. + * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu. + * The allocated event channel is bound to the specified vcpu and the + * binding cannot be changed. + */ +struct evtchn_bind_virq { + /* IN parameters. */ + uint32_t virq; /* enum virq */ + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_virq evtchn_bind_virq_t; + +/* + * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ ). + * NOTES: + * 1. A physical IRQ may be bound to at most one event channel per domain. + * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. + */ +struct evtchn_bind_pirq { + /* IN parameters. */ + uint32_t pirq; +#define BIND_PIRQ__WILL_SHARE 1 + uint32_t flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_pirq evtchn_bind_pirq_t; + +/* + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. + * NOTES: + * 1. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +struct evtchn_bind_ipi { + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_ipi evtchn_bind_ipi_t; + +/* + * EVTCHNOP_close: Close a local event channel . If the channel is + * interdomain then the remote end is placed in the unbound state + * (EVTCHNSTAT_unbound), awaiting a new connection. + */ +struct evtchn_close { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_close evtchn_close_t; + +/* + * EVTCHNOP_send: Send an event to the remote end of the channel whose local + * endpoint is . + */ +struct evtchn_send { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_send evtchn_send_t; + +/* + * EVTCHNOP_status: Get the current status of the communication channel which + * has an endpoint at . + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may obtain the status of an event + * channel for which is not DOMID_SELF. + */ +struct evtchn_status { + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + uint32_t status; + uint32_t vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ + } u; +}; +typedef struct evtchn_status evtchn_status_t; + +/* + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an + * event is pending. + * NOTES: + * 1. IPI-bound channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 3. All other channels notify vcpu0 by default. This default is set when + * the channel is allocated (a port that is freed and subsequently reused + * has its binding reset to vcpu0). + */ +struct evtchn_bind_vcpu { + /* IN parameters. */ + evtchn_port_t port; + uint32_t vcpu; +}; +typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t; + +/* + * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver + * a notification to the appropriate VCPU if an event is pending. + */ +struct evtchn_unmask { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_unmask evtchn_unmask_t; + +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + * 3. Destroys all control blocks and event array, resets event channel + * operations to 2-level ABI if called with == DOMID_SELF and FIFO + * ABI was used. Guests should not bind events during EVTCHNOP_reset call + * as these events are likely to be lost. + */ +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; +typedef struct evtchn_reset evtchn_reset_t; + +/* + * EVTCHNOP_init_control: initialize the control block for the FIFO ABI. + * + * Note: any events that are currently pending will not be resent and + * will be lost. Guests should call this before binding any event to + * avoid losing any events. + */ +struct evtchn_init_control { + /* IN parameters. */ + uint64_t control_gfn; + uint32_t offset; + uint32_t vcpu; + /* OUT parameters. */ + uint8_t link_bits; + uint8_t _pad[7]; +}; +typedef struct evtchn_init_control evtchn_init_control_t; + +/* + * EVTCHNOP_expand_array: add an additional page to the event array. + */ +struct evtchn_expand_array { + /* IN parameters. */ + uint64_t array_gfn; +}; +typedef struct evtchn_expand_array evtchn_expand_array_t; + +/* + * EVTCHNOP_set_priority: set the priority for an event channel. + */ +struct evtchn_set_priority { + /* IN parameters. */ + uint32_t port; + uint32_t priority; +}; +typedef struct evtchn_set_priority evtchn_set_priority_t; + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op) + * ` + * Superceded by new event_channel_op() hypercall since 0x00030202. + */ +struct evtchn_op { + uint32_t cmd; /* enum event_channel_op */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; +}; +typedef struct evtchn_op evtchn_op_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); + +/* + * 2-level ABI + */ + +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64) + +/* + * FIFO ABI + */ + +/* Events may have priorities from 0 (highest) to 15 (lowest). */ +#define EVTCHN_FIFO_PRIORITY_MAX 0 +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7 +#define EVTCHN_FIFO_PRIORITY_MIN 15 + +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1) + +typedef uint32_t event_word_t; + +#define EVTCHN_FIFO_PENDING 31 +#define EVTCHN_FIFO_MASKED 30 +#define EVTCHN_FIFO_LINKED 29 +#define EVTCHN_FIFO_BUSY 28 + +#define EVTCHN_FIFO_LINK_BITS 17 +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1) + +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS) + +struct evtchn_fifo_control_block { + uint32_t ready; + uint32_t _rsvd; + uint32_t head[EVTCHN_FIFO_MAX_QUEUES]; +}; +typedef struct evtchn_fifo_control_block evtchn_fifo_control_block_t; + +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/features.h xen-4.9.2/extras/mini-os/include/xen/features.h --- xen-4.9.0/extras/mini-os/include/xen/features.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/features.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,114 @@ +/****************************************************************************** + * features.h + * + * Feature flags, reported by XENVER_get_features. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_FEATURES_H__ +#define __XEN_PUBLIC_FEATURES_H__ + +/* + * `incontents 200 elfnotes_features XEN_ELFNOTE_FEATURES + * + * The list of all the features the guest supports. They are set by + * parsing the XEN_ELFNOTE_FEATURES and XEN_ELFNOTE_SUPPORTED_FEATURES + * string. The format is the feature names (as given here without the + * "XENFEAT_" prefix) separated by '|' characters. + * If a feature is required for the kernel to function then the feature name + * must be preceded by a '!' character. + * + * Note that if XEN_ELFNOTE_SUPPORTED_FEATURES is used, then in the + * XENFEAT_dom0 MUST be set if the guest is to be booted as dom0, + */ + +/* + * If set, the guest does not need to write-protect its pagetables, and can + * update them via direct writes. + */ +#define XENFEAT_writable_page_tables 0 + +/* + * If set, the guest does not need to write-protect its segment descriptor + * tables, and can update them via direct writes. + */ +#define XENFEAT_writable_descriptor_tables 1 + +/* + * If set, translation between the guest's 'pseudo-physical' address space + * and the host's machine address space are handled by the hypervisor. In this + * mode the guest does not need to perform phys-to/from-machine translations + * when performing page table operations. + */ +#define XENFEAT_auto_translated_physmap 2 + +/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */ +#define XENFEAT_supervisor_mode_kernel 3 + +/* + * If set, the guest does not need to allocate x86 PAE page directories + * below 4GB. This flag is usually implied by auto_translated_physmap. + */ +#define XENFEAT_pae_pgdir_above_4gb 4 + +/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ +#define XENFEAT_mmu_pt_update_preserve_ad 5 + +/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */ +#define XENFEAT_highmem_assist 6 + +/* + * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel + * available pte bits. + */ +#define XENFEAT_gnttab_map_avail_bits 7 + +/* x86: Does this Xen host support the HVM callback vector type? */ +#define XENFEAT_hvm_callback_vector 8 + +/* x86: pvclock algorithm is safe to use on HVM */ +#define XENFEAT_hvm_safe_pvclock 9 + +/* x86: pirq can be used by HVM guests */ +#define XENFEAT_hvm_pirqs 10 + +/* operation as Dom0 is supported */ +#define XENFEAT_dom0 11 + +/* Xen also maps grant references at pfn = mfn. + * This feature flag is deprecated and should not be used. +#define XENFEAT_grant_map_identity 12 + */ + +#define XENFEAT_NR_SUBMAPS 1 + +#endif /* __XEN_PUBLIC_FEATURES_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/gcov.h xen-4.9.2/extras/mini-os/include/xen/gcov.h --- xen-4.9.0/extras/mini-os/include/xen/gcov.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/gcov.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,115 @@ +/****************************************************************************** + * gcov.h + * + * Coverage structures exported by Xen. + * Structure is different from Gcc one. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2013, Citrix Systems R&D Ltd. + */ + +#ifndef __XEN_PUBLIC_GCOV_H__ +#define __XEN_PUBLIC_GCOV_H__ __XEN_PUBLIC_GCOV_H__ + +#define XENCOV_COUNTERS 5 +#define XENCOV_TAG_BASE 0x58544300u +#define XENCOV_TAG_FILE (XENCOV_TAG_BASE+0x46u) +#define XENCOV_TAG_FUNC (XENCOV_TAG_BASE+0x66u) +#define XENCOV_TAG_COUNTER(n) (XENCOV_TAG_BASE+0x30u+((n)&0xfu)) +#define XENCOV_TAG_END (XENCOV_TAG_BASE+0x2eu) +#define XENCOV_IS_TAG_COUNTER(n) \ + ((n) >= XENCOV_TAG_COUNTER(0) && (n) < XENCOV_TAG_COUNTER(XENCOV_COUNTERS)) +#define XENCOV_COUNTER_NUM(n) ((n)-XENCOV_TAG_COUNTER(0)) + +/* + * The main structure for the blob is + * BLOB := FILE.. END + * FILE := TAG_FILE VERSION STAMP FILENAME COUNTERS FUNCTIONS + * FILENAME := LEN characters + * characters are padded to 32 bit + * LEN := 32 bit value + * COUNTERS := TAG_COUNTER(n) NUM COUNTER.. + * NUM := 32 bit valie + * COUNTER := 64 bit value + * FUNCTIONS := TAG_FUNC NUM FUNCTION.. + * FUNCTION := IDENT CHECKSUM NUM_COUNTERS + * + * All tagged structures are aligned to 8 bytes + */ + +/** + * File information + * Prefixed with XENCOV_TAG_FILE and a string with filename + * Aligned to 8 bytes + */ +struct xencov_file +{ + uint32_t tag; /* XENCOV_TAG_FILE */ + uint32_t version; + uint32_t stamp; + uint32_t fn_len; + char filename[1]; +}; + + +/** + * Counters information + * Prefixed with XENCOV_TAG_COUNTER(n) where n is 0..(XENCOV_COUNTERS-1) + * Aligned to 8 bytes + */ +struct xencov_counter +{ + uint32_t tag; /* XENCOV_TAG_COUNTER(n) */ + uint32_t num; + uint64_t values[1]; +}; + +/** + * Information for each function + * Number of counter is equal to the number of counter structures got before + */ +struct xencov_function +{ + uint32_t ident; + uint32_t checksum; + uint32_t num_counters[1]; +}; + +/** + * Information for all functions + * Aligned to 8 bytes + */ +struct xencov_functions +{ + uint32_t tag; /* XENCOV_TAG_FUNC */ + uint32_t num; + struct xencov_function xencov_function[1]; +}; + +/** + * Terminator + */ +struct xencov_end +{ + uint32_t tag; /* XENCOV_TAG_END */ +}; + +#endif /* __XEN_PUBLIC_GCOV_H__ */ + diff -Nru xen-4.9.0/extras/mini-os/include/xen/grant_table.h xen-4.9.2/extras/mini-os/include/xen/grant_table.h --- xen-4.9.0/extras/mini-os/include/xen/grant_table.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/grant_table.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,682 @@ +/****************************************************************************** + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + +#include "xen.h" + +/* + * `incontents 150 gnttab Grant Tables + * + * Xen's grant tables provide a generic mechanism to memory sharing + * between domains. This shared memory interface underpins the split + * device drivers for block and network IO. + * + * Each domain has its own grant table. This is a data structure that + * is shared with Xen; it allows the domain to tell Xen what kind of + * permissions other domains have on its pages. Entries in the grant + * table are identified by grant references. A grant reference is an + * integer, which indexes into the grant table. It acts as a + * capability which the grantee can use to perform operations on the + * granter’s memory. + * + * This capability-based system allows shared-memory communications + * between unprivileged domains. A grant reference also encapsulates + * the details of a shared page, removing the need for a domain to + * know the real machine address of a page it is sharing. This makes + * it possible to share memory correctly with domains running in + * fully virtualised memory. + */ + +/*********************************** + * GRANT TABLE REPRESENTATION + */ + +/* Some rough guidelines on accessing and updating grant-table entries + * in a concurrency-safe manner. For more information, Linux contains a + * reference implementation for guest OSes (drivers/xen/grant_table.c, see + * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD + * + * NB. WMB is a no-op on current-generation x86 processors. However, a + * compiler barrier will still be required. + * + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + * + * Invalidating an unused GTF_permit_access entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * + * Invalidating an in-use GTF_permit_access entry: + * This cannot be done directly. Request assistance from the domain controller + * which can set a timeout on the use of a grant entry and take necessary + * action. (NB. This is not yet implemented!). + * + * Invalidating an unused GTF_accept_transfer entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & GTF_transfer_committed). [*] + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. + * The guest must /not/ modify the grant entry until the address of the + * transferred frame is written. It is safe for the guest to spin waiting + * for this to occur (detect by observing GTF_transfer_completed in + * ent->flags). + * + * Invalidating a committed GTF_accept_transfer entry: + * 1. Wait for (ent->flags & GTF_transfer_completed). + * + * Changing a GTF_permit_access from writable to read-only: + * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. + * + * Changing a GTF_permit_access from read-only to writable: + * Use SMP-safe bit-setting instruction. + */ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +/* + * A grant table comprises a packed array of grant entries in one or more + * page frames shared between Xen and a guest. + * [XEN]: This field is written by Xen and read by the sharing guest. + * [GST]: This field is written by the guest and read by Xen. + */ + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +#if __XEN_INTERFACE_VERSION__ < 0x0003020a +#define grant_entry_v1 grant_entry +#define grant_entry_v1_t grant_entry_t +#endif +struct grant_entry_v1 { + /* GTF_xxx: various type and flag information. [XEN,GST] */ + uint16_t flags; + /* The domain being granted foreign privileges. [GST] */ + domid_t domid; + /* + * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + */ + uint32_t frame; +}; +typedef struct grant_entry_v1 grant_entry_v1_t; + +/* The first few grant table entries will be preserved across grant table + * version changes and may be pre-populated at domain creation by tools. + */ +#define GNTTAB_NR_RESERVED_ENTRIES 8 +#define GNTTAB_RESERVED_CONSOLE 0 +#define GNTTAB_RESERVED_XENSTORE 1 + +/* + * Type of grant entry. + * GTF_invalid: This grant entry grants no privileges. + * GTF_permit_access: Allow @domid to map/access @frame. + * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame + * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. + */ +#define GTF_invalid (0U<<0) +#define GTF_permit_access (1U<<0) +#define GTF_accept_transfer (2U<<0) +#define GTF_transitive (3U<<0) +#define GTF_type_mask (3U<<0) + +/* + * Subflags for GTF_permit_access. + * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] + * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] + * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] + */ +#define _GTF_readonly (2) +#define GTF_readonly (1U<<_GTF_readonly) +#define _GTF_reading (3) +#define GTF_reading (1U<<_GTF_reading) +#define _GTF_writing (4) +#define GTF_writing (1U<<_GTF_writing) +#define _GTF_PWT (5) +#define GTF_PWT (1U<<_GTF_PWT) +#define _GTF_PCD (6) +#define GTF_PCD (1U<<_GTF_PCD) +#define _GTF_PAT (7) +#define GTF_PAT (1U<<_GTF_PAT) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U<<_GTF_sub_page) + +/* + * Subflags for GTF_accept_transfer: + * GTF_transfer_committed: Xen sets this flag to indicate that it is committed + * to transferring ownership of a page frame. When a guest sees this flag + * it must /not/ modify the grant entry until GTF_transfer_completed is + * set by Xen. + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag + * after reading GTF_transfer_committed. Xen will always write the frame + * address, followed by ORing this flag, in a timely manner. + */ +#define _GTF_transfer_committed (2) +#define GTF_transfer_committed (1U<<_GTF_transfer_committed) +#define _GTF_transfer_completed (3) +#define GTF_transfer_completed (1U<<_GTF_transfer_completed) + +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. + */ +struct grant_entry_header { + uint16_t flags; + domid_t domid; +}; +typedef struct grant_entry_header grant_entry_header_t; + +/* + * Version 2 of the grant entry structure. + */ +union grant_entry_v2 { + grant_entry_header_t hdr; + + /* + * This member is used for V1-style full page grants, where either: + * + * -- hdr.type is GTF_accept_transfer, or + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. + * + * In that case, the frame field has the same semantics as the + * field of the same name in the V1 entry structure. + */ + struct { + grant_entry_header_t hdr; + uint32_t pad0; + uint64_t frame; + } full_page; + + /* + * If the grant type is GTF_grant_access and GTF_sub_page is set, + * @domid is allowed to access bytes [@page_off,@page_off+@length) + * in frame @frame. + */ + struct { + grant_entry_header_t hdr; + uint16_t page_off; + uint16_t length; + uint64_t frame; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use the + * grant @gref in domain @trans_domid, as if it was the local + * domain. Obviously, the transitive access must be compatible + * with the original grant. + * + * The current version of Xen does not allow transitive grants + * to be mapped. + */ + struct { + grant_entry_header_t hdr; + domid_t trans_domid; + uint16_t pad0; + grant_ref_t gref; + } transitive; + + uint32_t __spacer[4]; /* Pad to a power of two */ +}; +typedef union grant_entry_v2 grant_entry_v2_t; + +typedef uint16_t grant_status_t; + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/*********************************** + * GRANT TABLE QUERIES AND USES + */ + +/* ` enum neg_errnoval + * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd, + * ` void *args, + * ` unsigned int count) + * ` + * + * @args points to an array of a per-command data structure. The array + * has @count members + */ + +/* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */ +#define GNTTABOP_map_grant_ref 0 +#define GNTTABOP_unmap_grant_ref 1 +#define GNTTABOP_setup_table 2 +#define GNTTABOP_dump_table 3 +#define GNTTABOP_transfer 4 +#define GNTTABOP_copy 5 +#define GNTTABOP_query_size 6 +#define GNTTABOP_unmap_and_replace 7 +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +#define GNTTABOP_set_version 8 +#define GNTTABOP_get_status_frames 9 +#define GNTTABOP_get_version 10 +#define GNTTABOP_swap_grant_ref 11 +#define GNTTABOP_cache_flush 12 +#endif /* __XEN_INTERFACE_VERSION__ */ +/* ` } */ + +/* + * Handle to track a mapping created via a grant reference. + */ +typedef uint32_t grant_handle_t; + +/* + * GNTTABOP_map_grant_ref: Map the grant entry (,) for access + * by devices and/or host CPUs. If successful, is a tracking number + * that must be presented later to destroy the mapping(s). On error, + * is a negative status code. + * NOTES: + * 1. If GNTMAP_device_map is specified then is the address + * via which I/O devices may access the granted frame. + * 2. If GNTMAP_host_map is specified then a mapping will be added at + * either a host virtual address in the current address space, or at + * a PTE at the specified machine address. The type of mapping to + * perform is selected through the GNTMAP_contains_pte flag, and the + * address is specified in . + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a + * host mapping is destroyed by other means then it is *NOT* guaranteed + * to be accounted to the correct grant reference! + */ +struct gnttab_map_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint32_t flags; /* GNTMAP_* */ + grant_ref_t ref; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ + grant_handle_t handle; + uint64_t dev_bus_addr; +}; +typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t); + +/* + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings + * tracked by . If or is zero, that + * field is ignored. If non-zero, they must refer to a device/host mapping + * that is tracked by + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 3. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +struct gnttab_unmap_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint64_t dev_bus_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); + +/* + * GNTTABOP_setup_table: Set up a grant table for comprising at least + * pages. The frame addresses are written to the . + * Only addresses are written, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + * 3. Xen may not support more than a single grant-table page per domain. + */ +struct gnttab_setup_table { + /* IN parameters. */ + domid_t dom; + uint32_t nr_frames; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +#if __XEN_INTERFACE_VERSION__ < 0x00040300 + XEN_GUEST_HANDLE(ulong) frame_list; +#else + XEN_GUEST_HANDLE(xen_pfn_t) frame_list; +#endif +}; +typedef struct gnttab_setup_table gnttab_setup_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t); + +/* + * GNTTABOP_dump_table: Dump the contents of the grant table to the + * xen console. Debugging use only. + */ +struct gnttab_dump_table { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_dump_table gnttab_dump_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); + +/* + * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The + * foreign domain has previously registered its interest in the transfer via + * . + * + * Note that, even if the transfer fails, the specified page no longer belongs + * to the calling domain *unless* the error is GNTST_bad_page. + */ +struct gnttab_transfer { + /* IN parameters. */ + xen_pfn_t mfn; + domid_t domid; + grant_ref_t ref; + /* OUT parameters. */ + int16_t status; +}; +typedef struct gnttab_transfer gnttab_transfer_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); + + +/* + * GNTTABOP_copy: Hypervisor based copy + * source and destinations can be eithers MFNs or, for foreign domains, + * grant references. the foreign domain has to grant read/write access + * in its grant table. + * + * The flags specify what type source and destinations are (either MFN + * or grant reference). + * + * Note that this can also be used to copy data between two domains + * via a third party if the source and destination domains had previously + * grant appropriate access to their pages to the third party. + * + * source_offset specifies an offset in the source frame, dest_offset + * the offset in the target frame and len specifies the number of + * bytes to be copied. + */ + +#define _GNTCOPY_source_gref (0) +#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) +#define _GNTCOPY_dest_gref (1) +#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) + +struct gnttab_copy { + /* IN parameters. */ + struct { + union { + grant_ref_t ref; + xen_pfn_t gmfn; + } u; + domid_t domid; + uint16_t offset; + } source, dest; + uint16_t len; + uint16_t flags; /* GNTCOPY_* */ + /* OUT parameters. */ + int16_t status; +}; +typedef struct gnttab_copy gnttab_copy_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); + +/* + * GNTTABOP_query_size: Query the current and maximum sizes of the shared + * grant table. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +struct gnttab_query_size { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + uint32_t nr_frames; + uint32_t max_nr_frames; + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_query_size gnttab_query_size_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); + +/* + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings + * tracked by but atomically replace the page table entry with one + * pointing to the machine address under . will be + * redirected to the null entry. + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 2. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +struct gnttab_unmap_and_replace { + /* IN parameters. */ + uint64_t host_addr; + uint64_t new_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); + +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +struct gnttab_set_version { + /* IN/OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_set_version gnttab_set_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); + + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for . In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * specify the size of vector . + * The frame addresses are returned in the . + * Only addresses are returned, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +struct gnttab_get_status_frames { + /* IN parameters. */ + uint32_t nr_frames; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ + XEN_GUEST_HANDLE(uint64_t) frame_list; +}; +typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); + +/* + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain . + */ +struct gnttab_get_version { + /* IN parameters */ + domid_t dom; + uint16_t pad; + /* OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_get_version gnttab_get_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t); + +/* + * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries. + */ +struct gnttab_swap_grant_ref { + /* IN parameters */ + grant_ref_t ref_a; + grant_ref_t ref_b; + /* OUT parameters */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); + +/* + * Issue one or more cache maintenance operations on a portion of a + * page granted to the calling domain by a foreign domain. + */ +struct gnttab_cache_flush { + union { + uint64_t dev_bus_addr; + grant_ref_t ref; + } a; + uint16_t offset; /* offset from start of grant */ + uint16_t length; /* size within the grant */ +#define GNTTAB_CACHE_CLEAN (1<<0) +#define GNTTAB_CACHE_INVAL (1<<1) +#define GNTTAB_CACHE_SOURCE_GREF (1<<31) + uint32_t op; +}; +typedef struct gnttab_cache_flush gnttab_cache_flush_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_cache_flush_t); + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/* + * Bitfield values for gnttab_map_grant_ref.flags. + */ + /* Map the grant entry for access by I/O devices. */ +#define _GNTMAP_device_map (0) +#define GNTMAP_device_map (1<<_GNTMAP_device_map) + /* Map the grant entry for access by host CPUs. */ +#define _GNTMAP_host_map (1) +#define GNTMAP_host_map (1<<_GNTMAP_host_map) + /* Accesses to the granted frame will be restricted to read-only access. */ +#define _GNTMAP_readonly (2) +#define GNTMAP_readonly (1<<_GNTMAP_readonly) + /* + * GNTMAP_host_map subflag: + * 0 => The host mapping is usable only by the guest OS. + * 1 => The host mapping is usable by guest OS + current application. + */ +#define _GNTMAP_application_map (3) +#define GNTMAP_application_map (1<<_GNTMAP_application_map) + + /* + * GNTMAP_contains_pte subflag: + * 0 => This map request contains a host virtual address. + * 1 => This map request contains the machine addess of the PTE to update. + */ +#define _GNTMAP_contains_pte (4) +#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) + +#define _GNTMAP_can_fail (5) +#define GNTMAP_can_fail (1<<_GNTMAP_can_fail) + +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) + +/* + * Values for error status returns. All errors are -ve. + */ +/* ` enum grant_status { */ +#define GNTST_okay (0) /* Normal return. */ +#define GNTST_general_error (-1) /* General undefined error. */ +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ +#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ +#define GNTST_bad_page (-9) /* Specified page was invalid for op. */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Operation not done; try again. */ +/* ` } */ + +#define GNTTABOP_error_msgs { \ + "okay", \ + "undefined error", \ + "unrecognised domain id", \ + "invalid grant reference", \ + "invalid mapping handle", \ + "invalid virtual address", \ + "invalid device address", \ + "no spare translation slot in the I/O MMU", \ + "permission denied", \ + "bad page", \ + "copy arguments cross page boundary", \ + "page address size too large", \ + "operation not done; try again" \ +} + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/hvm/e820.h xen-4.9.2/extras/mini-os/include/xen/hvm/e820.h --- xen-4.9.0/extras/mini-os/include/xen/hvm/e820.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/hvm/e820.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,34 @@ + +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_E820_H__ +#define __XEN_PUBLIC_HVM_E820_H__ + +/* E820 location in HVM virtual address space. */ +#define HVM_E820_PAGE 0x00090000 +#define HVM_E820_NR_OFFSET 0x000001E8 +#define HVM_E820_OFFSET 0x000002D0 + +#define HVM_BELOW_4G_RAM_END 0xF0000000 +#define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END +#define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START) + +#endif /* __XEN_PUBLIC_HVM_E820_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/hvm/hvm_info_table.h xen-4.9.2/extras/mini-os/include/xen/hvm/hvm_info_table.h --- xen-4.9.0/extras/mini-os/include/xen/hvm/hvm_info_table.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/hvm/hvm_info_table.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,72 @@ +/****************************************************************************** + * hvm/hvm_info_table.h + * + * HVM parameter and information table, written into guest memory map. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ +#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ + +#define HVM_INFO_PFN 0x09F +#define HVM_INFO_OFFSET 0x800 +#define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET) + +/* Maximum we can support with current vLAPIC ID mapping. */ +#define HVM_MAX_VCPUS 128 + +struct hvm_info_table { + char signature[8]; /* "HVM INFO" */ + uint32_t length; + uint8_t checksum; + + /* Should firmware build APIC descriptors (APIC MADT / MP BIOS)? */ + uint8_t apic_mode; + + /* How many CPUs does this domain have? */ + uint32_t nr_vcpus; + + /* + * MEMORY MAP provided by HVM domain builder. + * Notes: + * 1. page_to_phys(x) = x << 12 + * 2. If a field is zero, the corresponding range does not exist. + */ + /* + * 0x0 to page_to_phys(low_mem_pgend)-1: + * RAM below 4GB (except for VGA hole 0xA0000-0xBFFFF) + */ + uint32_t low_mem_pgend; + /* + * page_to_phys(reserved_mem_pgstart) to 0xFFFFFFFF: + * Reserved for special memory mappings + */ + uint32_t reserved_mem_pgstart; + /* + * 0x100000000 to page_to_phys(high_mem_pgend)-1: + * RAM above 4GB + */ + uint32_t high_mem_pgend; + + /* Bitmap of which CPUs are online at boot time. */ + uint8_t vcpu_online[(HVM_MAX_VCPUS + 7)/8]; +}; + +#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/hvm/hvm_op.h xen-4.9.2/extras/mini-os/include/xen/hvm/hvm_op.h --- xen-4.9.0/extras/mini-os/include/xen/hvm/hvm_op.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/hvm/hvm_op.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,402 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ +#define __XEN_PUBLIC_HVM_HVM_OP_H__ + +#include "../xen.h" +#include "../trace.h" +#include "../event_channel.h" + +/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ +#define HVMOP_set_param 0 +#define HVMOP_get_param 1 +struct xen_hvm_param { + domid_t domid; /* IN */ + uint32_t index; /* IN */ + uint64_t value; /* IN/OUT */ +}; +typedef struct xen_hvm_param xen_hvm_param_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); + +/* Set the logical level of one of a domain's PCI INTx wires. */ +#define HVMOP_set_pci_intx_level 2 +struct xen_hvm_set_pci_intx_level { + /* Domain to be updated. */ + domid_t domid; + /* PCI INTx identification in PCI topology (domain:bus:device:intx). */ + uint8_t domain, bus, device, intx; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t); + +/* Set the logical level of one of a domain's ISA IRQ wires. */ +#define HVMOP_set_isa_irq_level 3 +struct xen_hvm_set_isa_irq_level { + /* Domain to be updated. */ + domid_t domid; + /* ISA device identification, by ISA IRQ (0-15). */ + uint8_t isa_irq; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t); + +#define HVMOP_set_pci_link_route 4 +struct xen_hvm_set_pci_link_route { + /* Domain to be updated. */ + domid_t domid; + /* PCI link identifier (0-3). */ + uint8_t link; + /* ISA IRQ (1-15), or 0 (disable link). */ + uint8_t isa_irq; +}; +typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); + +/* Flushes all VCPU TLBs: @arg must be NULL. */ +#define HVMOP_flush_tlbs 5 + +typedef enum { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ + HVMMEM_mmio_write_dm /* Read-only; writes go to the device model */ +} hvmmem_type_t; + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* Track dirty VRAM. */ +#define HVMOP_track_dirty_vram 6 +struct xen_hvm_track_dirty_vram { + /* Domain to be tracked. */ + domid_t domid; + /* Number of pages to track. */ + uint32_t nr; + /* First pfn to track. */ + uint64_aligned_t first_pfn; + /* OUT variable. */ + /* Dirty bitmap buffer. */ + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; +}; +typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t); + +/* Notify that some pages got modified by the Device Model. */ +#define HVMOP_modified_memory 7 +struct xen_hvm_modified_memory { + /* Domain to be updated. */ + domid_t domid; + /* Number of pages. */ + uint32_t nr; + /* First pfn. */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); + +#define HVMOP_set_mem_type 8 +/* Notify that a region of memory is to be treated in a specific way. */ +struct xen_hvm_set_mem_type { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_type; + /* Number of pages. */ + uint32_t nr; + /* First pfn. */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +/* Hint from PV drivers for pagetable destruction. */ +#define HVMOP_pagetable_dying 9 +struct xen_hvm_pagetable_dying { + /* Domain with a pagetable about to be destroyed. */ + domid_t domid; + uint16_t pad[3]; /* align next field on 8-byte boundary */ + /* guest physical address of the toplevel pagetable dying */ + uint64_t gpa; +}; +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t); + +/* Get the current Xen time, in nanoseconds since system boot. */ +#define HVMOP_get_time 10 +struct xen_hvm_get_time { + uint64_t now; /* OUT */ +}; +typedef struct xen_hvm_get_time xen_hvm_get_time_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_time_t); + +#define HVMOP_xentrace 11 +struct xen_hvm_xentrace { + uint16_t event, extra_bytes; + uint8_t extra[TRACE_EXTRA_MAX * sizeof(uint32_t)]; +}; +typedef struct xen_hvm_xentrace xen_hvm_xentrace_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t); + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* Deprecated by XENMEM_access_op_set_access */ +#define HVMOP_set_mem_access 12 + +/* Deprecated by XENMEM_access_op_get_access */ +#define HVMOP_get_mem_access 13 + +#define HVMOP_inject_trap 14 +/* Inject a trap into a VCPU, which will get taken up on the next + * scheduling of it. Note that the caller should know enough of the + * state of the CPU before injecting, to know what the effect of + * injecting the trap will be. + */ +struct xen_hvm_inject_trap { + /* Domain to be queried. */ + domid_t domid; + /* VCPU */ + uint32_t vcpuid; + /* Vector number */ + uint32_t vector; + /* Trap type (HVMOP_TRAP_*) */ + uint32_t type; +/* NB. This enumeration precisely matches hvm.h:X86_EVENTTYPE_* */ +# define HVMOP_TRAP_ext_int 0 /* external interrupt */ +# define HVMOP_TRAP_nmi 2 /* nmi */ +# define HVMOP_TRAP_hw_exc 3 /* hardware exception */ +# define HVMOP_TRAP_sw_int 4 /* software interrupt (CD nn) */ +# define HVMOP_TRAP_pri_sw_exc 5 /* ICEBP (F1) */ +# define HVMOP_TRAP_sw_exc 6 /* INT3 (CC), INTO (CE) */ + /* Error code, or ~0u to skip */ + uint32_t error_code; + /* Intruction length */ + uint32_t insn_len; + /* CR2 for page faults */ + uint64_aligned_t cr2; +}; +typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#define HVMOP_get_mem_type 15 +/* Return hvmmem_type_t for the specified pfn. */ +struct xen_hvm_get_mem_type { + /* Domain to be queried. */ + domid_t domid; + /* OUT variable. */ + uint16_t mem_type; + uint16_t pad[2]; /* align next field on 8-byte boundary */ + /* IN variable. */ + uint64_t pfn; +}; +typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t); + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* MSI injection for emulated devices */ +#define HVMOP_inject_msi 16 +struct xen_hvm_inject_msi { + /* Domain to be injected */ + domid_t domid; + /* Data -- lower 32 bits */ + uint32_t data; + /* Address (0xfeexxxxx) */ + uint64_t addr; +}; +typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t); + +/* + * IOREQ Servers + * + * The interface between an I/O emulator an Xen is called an IOREQ Server. + * A domain supports a single 'legacy' IOREQ Server which is instantiated if + * parameter... + * + * HVM_PARAM_IOREQ_PFN is read (to get the gmfn containing the synchronous + * ioreq structures), or... + * HVM_PARAM_BUFIOREQ_PFN is read (to get the gmfn containing the buffered + * ioreq ring), or... + * HVM_PARAM_BUFIOREQ_EVTCHN is read (to get the event channel that Xen uses + * to request buffered I/O emulation). + * + * The following hypercalls facilitate the creation of IOREQ Servers for + * 'secondary' emulators which are invoked to implement port I/O, memory, or + * PCI config space ranges which they explicitly register. + */ + +typedef uint16_t ioservid_t; + +/* + * HVMOP_create_ioreq_server: Instantiate a new IOREQ Server for a secondary + * emulator servicing domain . + * + * The handed back is unique for . If is zero + * the buffered ioreq ring will not be allocated and hence all emulation + * requestes to this server will be synchronous. + */ +#define HVMOP_create_ioreq_server 17 +struct xen_hvm_create_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ + uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */ + ioservid_t id; /* OUT - server id */ +}; +typedef struct xen_hvm_create_ioreq_server xen_hvm_create_ioreq_server_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_create_ioreq_server_t); + +/* + * HVMOP_get_ioreq_server_info: Get all the information necessary to access + * IOREQ Server . + * + * The emulator needs to map the synchronous ioreq structures and buffered + * ioreq ring (if it exists) that Xen uses to request emulation. These are + * hosted in domain 's gmfns and + * respectively. In addition, if the IOREQ Server is handling buffered + * emulation requests, the emulator needs to bind to event channel + * to listen for them. (The event channels used for + * synchronous emulation requests are specified in the per-CPU ioreq + * structures in ). + * If the IOREQ Server is not handling buffered emulation requests then the + * values handed back in and will both be 0. + */ +#define HVMOP_get_ioreq_server_info 18 +struct xen_hvm_get_ioreq_server_info { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + evtchn_port_t bufioreq_port; /* OUT - buffered ioreq port */ + uint64_aligned_t ioreq_pfn; /* OUT - sync ioreq pfn */ + uint64_aligned_t bufioreq_pfn; /* OUT - buffered ioreq pfn */ +}; +typedef struct xen_hvm_get_ioreq_server_info xen_hvm_get_ioreq_server_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_ioreq_server_info_t); + +/* + * HVM_map_io_range_to_ioreq_server: Register an I/O range of domain + * for emulation by the client of IOREQ + * Server + * HVM_unmap_io_range_from_ioreq_server: Deregister an I/O range of + * for emulation by the client of IOREQ + * Server + * + * There are three types of I/O that can be emulated: port I/O, memory accesses + * and PCI config space accesses. The field denotes which type of range + * the and (inclusive) fields are specifying. + * PCI config space ranges are specified by segment/bus/device/function values + * which should be encoded using the HVMOP_PCI_SBDF helper macro below. + * + * NOTE: unless an emulation request falls entirely within a range mapped + * by a secondary emulator, it will not be passed to that emulator. + */ +#define HVMOP_map_io_range_to_ioreq_server 19 +#define HVMOP_unmap_io_range_from_ioreq_server 20 +struct xen_hvm_io_range { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + uint32_t type; /* IN - type of range */ +# define HVMOP_IO_RANGE_PORT 0 /* I/O port range */ +# define HVMOP_IO_RANGE_MEMORY 1 /* MMIO range */ +# define HVMOP_IO_RANGE_PCI 2 /* PCI segment/bus/dev/func range */ + uint64_aligned_t start, end; /* IN - inclusive start and end of range */ +}; +typedef struct xen_hvm_io_range xen_hvm_io_range_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_io_range_t); + +#define HVMOP_PCI_SBDF(s,b,d,f) \ + ((((s) & 0xffff) << 16) | \ + (((b) & 0xff) << 8) | \ + (((d) & 0x1f) << 3) | \ + ((f) & 0x07)) + +/* + * HVMOP_destroy_ioreq_server: Destroy the IOREQ Server servicing domain + * . + * + * Any registered I/O ranges will be automatically deregistered. + */ +#define HVMOP_destroy_ioreq_server 21 +struct xen_hvm_destroy_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ +}; +typedef struct xen_hvm_destroy_ioreq_server xen_hvm_destroy_ioreq_server_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_destroy_ioreq_server_t); + +/* + * HVMOP_set_ioreq_server_state: Enable or disable the IOREQ Server servicing + * domain . + * + * The IOREQ Server will not be passed any emulation requests until it is in the + * enabled state. + * Note that the contents of the ioreq_pfn and bufioreq_fn (see + * HVMOP_get_ioreq_server_info) are not meaningful until the IOREQ Server is in + * the enabled state. + */ +#define HVMOP_set_ioreq_server_state 22 +struct xen_hvm_set_ioreq_server_state { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + uint8_t enabled; /* IN - enabled? */ +}; +typedef struct xen_hvm_set_ioreq_server_state xen_hvm_set_ioreq_server_state_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#if defined(__i386__) || defined(__x86_64__) + +/* + * HVMOP_set_evtchn_upcall_vector: Set a that should be used for event + * channel upcalls on the specified . If set, + * this vector will be used in preference to the + * domain global callback via (see + * HVM_PARAM_CALLBACK_IRQ). + */ +#define HVMOP_set_evtchn_upcall_vector 23 +struct xen_hvm_evtchn_upcall_vector { + uint32_t vcpu; + uint8_t vector; +}; +typedef struct xen_hvm_evtchn_upcall_vector xen_hvm_evtchn_upcall_vector_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_evtchn_upcall_vector_t); + +#endif /* defined(__i386__) || defined(__x86_64__) */ + +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/hvm/hvm_xs_strings.h xen-4.9.2/extras/mini-os/include/xen/hvm/hvm_xs_strings.h --- xen-4.9.0/extras/mini-os/include/xen/hvm/hvm_xs_strings.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/hvm/hvm_xs_strings.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,80 @@ +/****************************************************************************** + * hvm/hvm_xs_strings.h + * + * HVM xenstore strings used in HVMLOADER. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ +#define __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ + +#define HVM_XS_HVMLOADER "hvmloader" +#define HVM_XS_BIOS "hvmloader/bios" +#define HVM_XS_GENERATION_ID_ADDRESS "hvmloader/generation-id-address" +#define HVM_XS_ALLOW_MEMORY_RELOCATE "hvmloader/allow-memory-relocate" + +/* The following values allow additional ACPI tables to be added to the + * virtual ACPI BIOS that hvmloader constructs. The values specify the guest + * physical address and length of a block of ACPI tables to add. The format of + * the block is simply concatenated raw tables (which specify their own length + * in the ACPI header). + */ +#define HVM_XS_ACPI_PT_ADDRESS "hvmloader/acpi/address" +#define HVM_XS_ACPI_PT_LENGTH "hvmloader/acpi/length" + +/* Any number of SMBIOS types can be passed through to an HVM guest using + * the following xenstore values. The values specify the guest physical + * address and length of a block of SMBIOS structures for hvmloader to use. + * The block is formatted in the following way: + * + * ... + * + * Each length separator is a 32b integer indicating the length of the next + * SMBIOS structure. For DMTF defined types (0 - 121), the passed in struct + * will replace the default structure in hvmloader. In addition, any + * OEM/vendortypes (128 - 255) will all be added. + */ +#define HVM_XS_SMBIOS_PT_ADDRESS "hvmloader/smbios/address" +#define HVM_XS_SMBIOS_PT_LENGTH "hvmloader/smbios/length" + +/* Set to 1 to enable SMBIOS default portable battery (type 22) values. */ +#define HVM_XS_SMBIOS_DEFAULT_BATTERY "hvmloader/smbios/default_battery" + +/* The following xenstore values are used to override some of the default + * string values in the SMBIOS table constructed in hvmloader. + */ +#define HVM_XS_BIOS_STRINGS "bios-strings" +#define HVM_XS_BIOS_VENDOR "bios-strings/bios-vendor" +#define HVM_XS_BIOS_VERSION "bios-strings/bios-version" +#define HVM_XS_SYSTEM_MANUFACTURER "bios-strings/system-manufacturer" +#define HVM_XS_SYSTEM_PRODUCT_NAME "bios-strings/system-product-name" +#define HVM_XS_SYSTEM_VERSION "bios-strings/system-version" +#define HVM_XS_SYSTEM_SERIAL_NUMBER "bios-strings/system-serial-number" +#define HVM_XS_ENCLOSURE_MANUFACTURER "bios-strings/enclosure-manufacturer" +#define HVM_XS_ENCLOSURE_SERIAL_NUMBER "bios-strings/enclosure-serial-number" +#define HVM_XS_BATTERY_MANUFACTURER "bios-strings/battery-manufacturer" +#define HVM_XS_BATTERY_DEVICE_NAME "bios-strings/battery-device-name" + +/* 1 to 99 OEM strings can be set in xenstore using values of the form + * below. These strings will be loaded into the SMBIOS type 11 structure. + */ +#define HVM_XS_OEM_STRINGS "bios-strings/oem-%d" + +#endif /* __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/hvm/ioreq.h xen-4.9.2/extras/mini-os/include/xen/hvm/ioreq.h --- xen-4.9.0/extras/mini-os/include/xen/hvm/ioreq.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/hvm/ioreq.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,129 @@ +/* + * ioreq.h: I/O request definitions for device models + * Copyright (c) 2004, Intel Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _IOREQ_H_ +#define _IOREQ_H_ + +#define IOREQ_READ 1 +#define IOREQ_WRITE 0 + +#define STATE_IOREQ_NONE 0 +#define STATE_IOREQ_READY 1 +#define STATE_IOREQ_INPROCESS 2 +#define STATE_IORESP_READY 3 + +#define IOREQ_TYPE_PIO 0 /* pio */ +#define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_PCI_CONFIG 2 +#define IOREQ_TYPE_TIMEOFFSET 7 +#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ + +/* + * VMExit dispatcher should cooperate with instruction decoder to + * prepare this structure and notify service OS and DM by sending + * virq. + * + * For I/O type IOREQ_TYPE_PCI_CONFIG, the physical address is formatted + * as follows: + * + * 63....48|47..40|39..35|34..32|31........0 + * SEGMENT |BUS |DEV |FN |OFFSET + */ +struct ioreq { + uint64_t addr; /* physical address */ + uint64_t data; /* data (or paddr of data) */ + uint32_t count; /* for rep prefixes */ + uint32_t size; /* size in bytes */ + uint32_t vp_eport; /* evtchn for notifications to/from device model */ + uint16_t _pad0; + uint8_t state:4; + uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr + * of the real data to use. */ + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t df:1; + uint8_t _pad1:1; + uint8_t type; /* I/O type */ +}; +typedef struct ioreq ioreq_t; + +struct shared_iopage { + struct ioreq vcpu_ioreq[1]; +}; +typedef struct shared_iopage shared_iopage_t; + +struct buf_ioreq { + uint8_t type; /* I/O type */ + uint8_t pad:1; + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */ + uint32_t addr:20;/* physical address */ + uint32_t data; /* data */ +}; +typedef struct buf_ioreq buf_ioreq_t; + +#define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte indexes */ +struct buffered_iopage { + unsigned int read_pointer; + unsigned int write_pointer; + buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM]; +}; /* NB. Size of this structure must be no greater than one page. */ +typedef struct buffered_iopage buffered_iopage_t; + +/* + * ACPI Control/Event register locations. Location is controlled by a + * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION. + */ + +/* Version 0 (default): Traditional Xen locations. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V0 (ACPI_PM_TMR_BLK_ADDRESS_V0 + 0x20) +#define ACPI_GPE0_BLK_LEN_V0 0x08 + +/* Version 1: Locations preferred by modern Qemu. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V1 0xafe0 +#define ACPI_GPE0_BLK_LEN_V1 0x04 + +/* Compatibility definitions for the default location (version 0). */ +#define ACPI_PM1A_EVT_BLK_ADDRESS ACPI_PM1A_EVT_BLK_ADDRESS_V0 +#define ACPI_PM1A_CNT_BLK_ADDRESS ACPI_PM1A_CNT_BLK_ADDRESS_V0 +#define ACPI_PM_TMR_BLK_ADDRESS ACPI_PM_TMR_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_ADDRESS ACPI_GPE0_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_LEN ACPI_GPE0_BLK_LEN_V0 + + +#endif /* _IOREQ_H_ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/hvm/params.h xen-4.9.2/extras/mini-os/include/xen/hvm/params.h --- xen-4.9.0/extras/mini-os/include/xen/hvm/params.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/hvm/params.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,199 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ +#define __XEN_PUBLIC_HVM_PARAMS_H__ + +#include "hvm_op.h" + +/* + * Parameter space for HVMOP_{set,get}_param. + */ + +/* + * How should CPU0 event-channel notifications be delivered? + * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). + * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: + * Domain = val[47:32], Bus = val[31:16], + * DevFn = val[15: 8], IntX = val[ 1: 0] + * val[63:56] == 2: val[7:0] is a vector number, check for + * XENFEAT_hvm_callback_vector to know if this delivery + * method is available. + * If val == 0 then CPU0 event-channel notifications are not delivered. + */ +#define HVM_PARAM_CALLBACK_IRQ 0 + +/* + * These are not used by Xen. They are here for convenience of HVM-guest + * xenbus implementations. + */ +#define HVM_PARAM_STORE_PFN 1 +#define HVM_PARAM_STORE_EVTCHN 2 + +#define HVM_PARAM_PAE_ENABLED 4 + +#define HVM_PARAM_IOREQ_PFN 5 + +#define HVM_PARAM_BUFIOREQ_PFN 6 +#define HVM_PARAM_BUFIOREQ_EVTCHN 26 + +#if defined(__i386__) || defined(__x86_64__) + +/* + * Viridian enlightenments + * + * (See http://download.microsoft.com/download/A/B/4/AB43A34E-BDD0-4FA6-BDEF-79EEF16E880B/Hypervisor%20Top%20Level%20Functional%20Specification%20v4.0.docx) + * + * To expose viridian enlightenments to the guest set this parameter + * to the desired feature mask. The base feature set must be present + * in any valid feature mask. + */ +#define HVM_PARAM_VIRIDIAN 9 + +/* Base+Freq viridian feature sets: + * + * - Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) + * - APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) + * - Virtual Processor index MSR (HV_X64_MSR_VP_INDEX) + * - Timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and + * HV_X64_MSR_APIC_FREQUENCY) + */ +#define _HVMPV_base_freq 0 +#define HVMPV_base_freq (1 << _HVMPV_base_freq) + +/* Feature set modifications */ + +/* Disable timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and + * HV_X64_MSR_APIC_FREQUENCY). + * This modification restores the viridian feature set to the + * original 'base' set exposed in releases prior to Xen 4.4. + */ +#define _HVMPV_no_freq 1 +#define HVMPV_no_freq (1 << _HVMPV_no_freq) + +/* Enable Partition Time Reference Counter (HV_X64_MSR_TIME_REF_COUNT) */ +#define _HVMPV_time_ref_count 2 +#define HVMPV_time_ref_count (1 << _HVMPV_time_ref_count) + +/* Enable Reference TSC Page (HV_X64_MSR_REFERENCE_TSC) */ +#define _HVMPV_reference_tsc 3 +#define HVMPV_reference_tsc (1 << _HVMPV_reference_tsc) + +#define HVMPV_feature_mask \ + (HVMPV_base_freq | \ + HVMPV_no_freq | \ + HVMPV_time_ref_count | \ + HVMPV_reference_tsc) + +#endif + +/* + * Set mode for virtual timers (currently x86 only): + * delay_for_missed_ticks (default): + * Do not advance a vcpu's time beyond the correct delivery time for + * interrupts that have been missed due to preemption. Deliver missed + * interrupts when the vcpu is rescheduled and advance the vcpu's virtual + * time stepwise for each one. + * no_delay_for_missed_ticks: + * As above, missed interrupts are delivered, but guest time always tracks + * wallclock (i.e., real) time while doing so. + * no_missed_ticks_pending: + * No missed interrupts are held pending. Instead, to ensure ticks are + * delivered at some non-zero rate, if we detect missed ticks then the + * internal tick alarm is not disabled if the VCPU is preempted during the + * next tick period. + * one_missed_tick_pending: + * Missed interrupts are collapsed together and delivered as one 'late tick'. + * Guest time always tracks wallclock (i.e., real) time. + */ +#define HVM_PARAM_TIMER_MODE 10 +#define HVMPTM_delay_for_missed_ticks 0 +#define HVMPTM_no_delay_for_missed_ticks 1 +#define HVMPTM_no_missed_ticks_pending 2 +#define HVMPTM_one_missed_tick_pending 3 + +/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ +#define HVM_PARAM_HPET_ENABLED 11 + +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ +#define HVM_PARAM_IDENT_PT 12 + +/* Device Model domain, defaults to 0. */ +#define HVM_PARAM_DM_DOMAIN 13 + +/* ACPI S state: currently support S0 and S3 on x86. */ +#define HVM_PARAM_ACPI_S_STATE 14 + +/* TSS used on Intel when CR0.PE=0. */ +#define HVM_PARAM_VM86_TSS 15 + +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ +#define HVM_PARAM_VPT_ALIGN 16 + +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN 17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 + +/* + * Select location of ACPI PM1a and TMR control blocks. Currently two locations + * are supported, specified by version 0 or 1 in this parameter: + * - 0: default, use the old addresses + * PM1A_EVT == 0x1f40; PM1A_CNT == 0x1f44; PM_TMR == 0x1f48 + * - 1: use the new default qemu addresses + * PM1A_EVT == 0xb000; PM1A_CNT == 0xb004; PM_TMR == 0xb008 + * You can find these address definitions in + */ +#define HVM_PARAM_ACPI_IOPORTS_LOCATION 19 + +/* Enable blocking memory events, async or sync (pause vcpu until response) + * onchangeonly indicates messages only on a change of value */ +#define HVM_PARAM_MEMORY_EVENT_CR0 20 +#define HVM_PARAM_MEMORY_EVENT_CR3 21 +#define HVM_PARAM_MEMORY_EVENT_CR4 22 +#define HVM_PARAM_MEMORY_EVENT_INT3 23 +#define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP 25 +#define HVM_PARAM_MEMORY_EVENT_MSR 30 + +#define HVMPME_MODE_MASK (3 << 0) +#define HVMPME_mode_disabled 0 +#define HVMPME_mode_async 1 +#define HVMPME_mode_sync 2 +#define HVMPME_onchangeonly (1 << 2) + +/* Boolean: Enable nestedhvm (hvm only) */ +#define HVM_PARAM_NESTEDHVM 24 + +/* Params for the mem event rings */ +#define HVM_PARAM_PAGING_RING_PFN 27 +#define HVM_PARAM_ACCESS_RING_PFN 28 +#define HVM_PARAM_SHARING_RING_PFN 29 + +/* SHUTDOWN_* action in case of a triple fault */ +#define HVM_PARAM_TRIPLE_FAULT_REASON 31 + +#define HVM_PARAM_IOREQ_SERVER_PFN 32 +#define HVM_PARAM_NR_IOREQ_SERVER_PAGES 33 + +/* Location of the VM Generation ID in guest physical address space. */ +#define HVM_PARAM_VM_GENERATION_ID_ADDR 34 + +#define HVM_NR_PARAMS 35 + +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/hvm/pvdrivers.h xen-4.9.2/extras/mini-os/include/xen/hvm/pvdrivers.h --- xen-4.9.0/extras/mini-os/include/xen/hvm/pvdrivers.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/hvm/pvdrivers.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,49 @@ +/* + * pvdrivers.h: Register of PV drivers product numbers. + * Copyright (c) 2012, Citrix Systems Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _XEN_PUBLIC_PVDRIVERS_H_ +#define _XEN_PUBLIC_PVDRIVERS_H_ + +/* + * This is the master registry of product numbers for + * PV drivers. + * If you need a new product number allocating, please + * post to xen-devel@lists.xenproject.org. You should NOT use + * a product number without allocating one. + * If you maintain a separate versioning and distribution path + * for PV drivers you should have a separate product number so + * that your drivers can be separated from others. + * + * During development, you may use the product ID to + * indicate a driver which is yet to be released. + */ + +#define PVDRIVERS_PRODUCT_LIST(EACH) \ + EACH("xensource-windows", 0x0001) /* Citrix */ \ + EACH("gplpv-windows", 0x0002) /* James Harper */ \ + EACH("linux", 0x0003) \ + EACH("xenserver-windows-v7.0+", 0x0004) /* Citrix */ \ + EACH("xenserver-windows-v7.2+", 0x0005) /* Citrix */ \ + EACH("experimental", 0xffff) + +#endif /* _XEN_PUBLIC_PVDRIVERS_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/hvm/save.h xen-4.9.2/extras/mini-os/include/xen/hvm/save.h --- xen-4.9.0/extras/mini-os/include/xen/hvm/save.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/hvm/save.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,111 @@ +/* + * hvm/save.h + * + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * Copyright (c) 2007 XenSource Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_H__ +#define __XEN_PUBLIC_HVM_SAVE_H__ + +/* + * Structures in this header *must* have the same layout in 32bit + * and 64bit environments: this means that all fields must be explicitly + * sized types and aligned to their sizes, and the structs must be + * a multiple of eight bytes long. + * + * Only the state necessary for saving and restoring (i.e. fields + * that are analogous to actual hardware state) should go in this file. + * Internal mechanisms should be kept in Xen-private headers. + */ + +#if !defined(__GNUC__) || defined(__STRICT_ANSI__) +#error "Anonymous structs/unions are a GNU extension." +#endif + +/* + * Each entry is preceded by a descriptor giving its type and length + */ +struct hvm_save_descriptor { + uint16_t typecode; /* Used to demux the various types below */ + uint16_t instance; /* Further demux within a type */ + uint32_t length; /* In bytes, *not* including this descriptor */ +}; + + +/* + * Each entry has a datatype associated with it: for example, the CPU state + * is saved as a HVM_SAVE_TYPE(CPU), which has HVM_SAVE_LENGTH(CPU), + * and is identified by a descriptor with typecode HVM_SAVE_CODE(CPU). + * DECLARE_HVM_SAVE_TYPE binds these things together with some type-system + * ugliness. + */ + +#ifdef __XEN__ +# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { return _fix(h); } \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}; \ + struct __HVM_SAVE_TYPE_COMPAT_##_x { _ctype t; } + +# include /* BUG() */ +# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { BUG(); return -1; } \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}; \ + struct __HVM_SAVE_TYPE_COMPAT_##_x { _type t; } +#else +# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];} + +# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];} +#endif + +#define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t) +#define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x))) +#define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c)) + +#ifdef __XEN__ +# define HVM_SAVE_TYPE_COMPAT(_x) typeof (((struct __HVM_SAVE_TYPE_COMPAT_##_x *)(0))->t) +# define HVM_SAVE_LENGTH_COMPAT(_x) (sizeof (HVM_SAVE_TYPE_COMPAT(_x))) + +# define HVM_SAVE_HAS_COMPAT(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->cpt)-1) +# define HVM_SAVE_FIX_COMPAT(_x, _dst) __HVM_SAVE_FIX_COMPAT_##_x(_dst) +#endif + +/* + * The series of save records is teminated by a zero-type, zero-length + * descriptor. + */ + +struct hvm_save_end {}; +DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end); + +#if defined(__i386__) || defined(__x86_64__) +#include "../arch-x86/hvm/save.h" +#elif defined(__arm__) || defined(__aarch64__) +#include "../arch-arm/hvm/save.h" +#else +#error "unsupported architecture" +#endif + +#endif /* __XEN_PUBLIC_HVM_SAVE_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/blkif.h xen-4.9.2/extras/mini-os/include/xen/io/blkif.h --- xen-4.9.0/extras/mini-os/include/xen/io/blkif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/blkif.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,640 @@ +/****************************************************************************** + * blkif.h + * + * Unified block-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + * Copyright (c) 2012, Spectra Logic Corporation + */ + +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ +#define __XEN_PUBLIC_IO_BLKIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Front->back notifications: When enqueuing a new request, sending a + * notification can be made conditional on req_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: When enqueuing a new response, sending a + * notification can be made conditional on rsp_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + */ + +#ifndef blkif_vdev_t +#define blkif_vdev_t uint16_t +#endif +#define blkif_sector_t uint64_t + +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen block driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * All data in the XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + * XenStore nodes marked "DEPRECATED" in their notes section should only be + * used to provide interoperability with legacy implementations. + * + * See the XenBus state transition diagram below for details on when XenBus + * nodes must be published and when they can be queried. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * mode + * Values: "r" (read only), "w" (writable) + * + * The read or write access permissions to the backing store to be + * granted to the frontend. + * + * params + * Values: string + * + * A free formatted string providing sufficient information for the + * backend driver to open the backing device. (e.g. the path to the + * file or block device representing the backing store.) + * + * type + * Values: "file", "phy", "tap" + * + * The type of the backing device/object. + * + * + * direct-io-safe + * Values: 0/1 (boolean) + * Default Value: 0 + * + * The underlying storage is not affected by the direct IO memory + * lifetime bug. See: + * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html + * + * Therefore this option gives the backend permission to use + * O_DIRECT, notwithstanding that bug. + * + * That is, if this option is enabled, use of O_DIRECT is safe, + * in circumstances where we would normally have avoided it as a + * workaround for that bug. This option is not relevant for all + * backends, and even not necessarily supported for those for + * which it is relevant. A backend which knows that it is not + * affected by the bug can ignore this option. + * + * This option doesn't require a backend to use O_DIRECT, so it + * should not be used to try to control the caching behaviour. + * + *--------------------------------- Features --------------------------------- + * + * feature-barrier + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-flush-cache + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-discard + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_DISCARD request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7 + * + * A value of "1" indicates that the backend can keep the grants used + * by the frontend driver mapped, so the same set of grants should be + * used in all transactions. The maximum number of grants the backend + * can map persistently depends on the implementation, but ideally it + * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this + * feature the backend doesn't need to unmap each grant, preventing + * costly TLB flushes. The backend driver should only map grants + * persistently if the frontend supports it. If a backend driver chooses + * to use the persistent protocol when the frontend doesn't support it, + * it will probably hit the maximum number of persistently mapped grants + * (due to the fact that the frontend won't be reusing the same grants), + * and fall back to non-persistent mode. Backend implementations may + * shrink or expand the number of persistently mapped grants without + * notifying the frontend depending on memory constraints (this might + * cause a performance degradation). + * + * If a backend driver wants to limit the maximum number of persistently + * mapped grants to a value less than RING_SIZE * + * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to + * discard the grants that are less commonly used. Using a LRU in the + * backend driver paired with a LIFO queue in the frontend will + * allow us to have better performance in this scenario. + * + *----------------------- Request Transport Parameters ------------------------ + * + * max-ring-page-order + * Values: + * Default Value: 0 + * Notes: 1, 3 + * + * The maximum supported size of the request ring buffer in units of + * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * max-ring-pages + * Values: + * Default Value: 1 + * Notes: DEPRECATED, 2, 3 + * + * The maximum supported size of the request ring buffer in units of + * machine pages. The value must be a power of 2. + * + *------------------------- Backend Device Properties ------------------------- + * + * discard-enable + * Values: 0/1 (boolean) + * Default Value: 1 + * + * This optional property, set by the toolstack, instructs the backend + * to offer discard to the frontend. If the property is missing the + * backend should offer discard if the backing storage actually supports + * it. This optional property, set by the toolstack, requests that the + * backend offer, or not offer, discard to the frontend. + * + * discard-alignment + * Values: + * Default Value: 0 + * Notes: 4, 5 + * + * The offset, in bytes from the beginning of the virtual block device, + * to the first, addressable, discard extent on the underlying device. + * + * discard-granularity + * Values: + * Default Value: <"sector-size"> + * Notes: 4 + * + * The size, in bytes, of the individually addressable discard extents + * of the underlying device. + * + * discard-secure + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 10 + * + * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD + * requests with the BLKIF_DISCARD_SECURE flag set. + * + * info + * Values: (bitmap) + * + * A collection of bit flags describing attributes of the backing + * device. The VDISK_* macros define the meaning of each bit + * location. + * + * sector-size + * Values: + * + * The logical sector size, in bytes, of the backend device. + * + * physical-sector-size + * Values: + * + * The physical sector size, in bytes, of the backend device. + * + * sectors + * Values: + * + * The size of the backend device, expressed in units of its logical + * sector size ("sector-size"). + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * ring-ref + * Values: + * Notes: 6 + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. + * + * ring-ref%u + * Values: + * Notes: 6 + * + * For a frontend providing a multi-page ring, a "number of ring pages" + * sized list of nodes, each containing a Xen grant reference granting + * permission for the backend to map the page of the ring located + * at page index "%u". Page indexes are zero based. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + * + * ring-page-order + * Values: + * Default Value: 0 + * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order) + * Notes: 1, 3 + * + * The size of the frontend allocated request ring buffer in units + * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * num-ring-pages + * Values: + * Default Value: 1 + * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order)) + * Notes: DEPRECATED, 2, 3 + * + * The size of the frontend allocated request ring buffer in units of + * machine pages. The value must be a power of 2. + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7, 8, 9 + * + * A value of "1" indicates that the frontend will reuse the same grants + * for all transactions, allowing the backend to map them with write + * access (even when it should be read-only). If the frontend hits the + * maximum number of allowed persistently mapped grants, it can fallback + * to non persistent mode. This will cause a performance degradation, + * since the the backend driver will still try to map those grants + * persistently. Since the persistent grants protocol is compatible with + * the previous protocol, a frontend driver can choose to work in + * persistent mode even when the backend doesn't support it. + * + * It is recommended that the frontend driver stores the persistently + * mapped grants in a LIFO queue, so a subset of all persistently mapped + * grants gets used commonly. This is done in case the backend driver + * decides to limit the maximum number of persistently mapped grants + * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + * + *------------------------- Virtual Device Properties ------------------------- + * + * device-type + * Values: "disk", "cdrom", "floppy", etc. + * + * virtual-device + * Values: + * + * A value indicating the physical device to virtualize within the + * frontend's domain. (e.g. "The first ATA disk", "The third SCSI + * disk", etc.) + * + * See docs/misc/vbd-interface.txt for details on the format of this + * value. + * + * Notes + * ----- + * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer + * PV drivers. + * (2) Multi-page ring buffer scheme first used in some RedHat distributions + * including a distribution deployed on certain nodes of the Amazon + * EC2 cluster. + * (3) Support for multi-page ring buffers was implemented independently, + * in slightly different forms, by both Citrix and RedHat/Amazon. + * For full interoperability, block front and backends should publish + * identical ring parameters, adjusted for unit differences, to the + * XenStore nodes used in both schemes. + * (4) Devices that support discard functionality may internally allocate space + * (discardable extents) in units that are larger than the exported logical + * block size. If the backing device has such discardable extents the + * backend should provide both discard-granularity and discard-alignment. + * Providing just one of the two may be considered an error by the frontend. + * Backends supporting discard should include discard-granularity and + * discard-alignment even if it supports discarding individual sectors. + * Frontends should assume discard-alignment == 0 and discard-granularity + * == sector size if these keys are missing. + * (5) The discard-alignment parameter allows a physical device to be + * partitioned into virtual devices that do not necessarily begin or + * end on a discardable extent boundary. + * (6) When there is only a single page allocated to the request ring, + * 'ring-ref' is used to communicate the grant reference for this + * page to the backend. When using a multi-page ring, the 'ring-ref' + * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. + * (7) When using persistent grants data has to be copied from/to the page + * where the grant is currently mapped. The overhead of doing this copy + * however doesn't suppress the speed improvement of not having to unmap + * the grants. + * (8) The frontend driver has to allow the backend driver to map all grants + * with write access, even when they should be mapped read-only, since + * further requests may reuse these grants and require write permissions. + * (9) Linux implementation doesn't have a limit on the maximum number of + * grants that can be persistently mapped in the frontend driver, but + * due to the frontent driver implementation it should never be bigger + * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + *(10) The discard-secure property may be present and will be set to 1 if the + * backing device supports secure discard. + */ + +/* + * STATE DIAGRAMS + * + ***************************************************************************** + * Startup * + ***************************************************************************** + * + * Tool stack creates front and back nodes with state XenbusStateInitialising. + * + * Front Back + * ================================= ===================================== + * XenbusStateInitialising XenbusStateInitialising + * o Query virtual device o Query backend device identification + * properties. data. + * o Setup OS device instance. o Open and validate backend device. + * o Publish backend features and + * transport parameters. + * | + * | + * V + * XenbusStateInitWait + * + * o Query backend features and + * transport parameters. + * o Allocate and initialize the + * request ring. + * o Publish transport parameters + * that will be in effect during + * this connection. + * | + * | + * V + * XenbusStateInitialised + * + * o Query frontend transport parameters. + * o Connect to the request ring and + * event channel. + * o Publish backend device properties. + * | + * | + * V + * XenbusStateConnected + * + * o Query backend device properties. + * o Finalize OS virtual device + * instance. + * | + * | + * V + * XenbusStateConnected + * + * Note: Drivers that do not support any optional features, or the negotiation + * of transport parameters, can skip certain states in the state machine: + * + * o A frontend may transition to XenbusStateInitialised without + * waiting for the backend to enter XenbusStateInitWait. In this + * case, default transport parameters are in effect and any + * transport parameters published by the frontend must contain + * their default values. + * + * o A backend may transition to XenbusStateInitialised, bypassing + * XenbusStateInitWait, without waiting for the frontend to first + * enter the XenbusStateInitialised state. In this case, default + * transport parameters are in effect and any transport parameters + * published by the backend must contain their default values. + * + * Drivers that support optional features and/or transport parameter + * negotiation must tolerate these additional state transition paths. + * In general this means performing the work of any skipped state + * transition, if it has not already been performed, in addition to the + * work associated with entry into the current state. + */ + +/* + * REQUEST CODES. + */ +#define BLKIF_OP_READ 0 +#define BLKIF_OP_WRITE 1 +/* + * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER + * operation code ("barrier request") must be completed prior to the + * execution of the barrier request. All writes issued after the barrier + * request must not execute until after the completion of the barrier request. + * + * Optional. See "feature-barrier" XenBus node documentation above. + */ +#define BLKIF_OP_WRITE_BARRIER 2 +/* + * Commit any uncommitted contents of the backing device's volatile cache + * to stable storage. + * + * Optional. See "feature-flush-cache" XenBus node documentation above. + */ +#define BLKIF_OP_FLUSH_DISKCACHE 3 +/* + * Used in SLES sources for device specific command packet + * contained within the request. Reserved for that purpose. + */ +#define BLKIF_OP_RESERVED_1 4 +/* + * Indicate to the backend device that a region of storage is no longer in + * use, and may be discarded at any time without impact to the client. If + * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the + * discarded region on the device must be rendered unrecoverable before the + * command returns. + * + * This operation is analogous to performing a trim (ATA) or unamp (SCSI), + * command on a native device. + * + * More information about trim/unmap operations can be found at: + * http://t13.org/Documents/UploadedDocuments/docs2008/ + * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc + * http://www.seagate.com/staticfiles/support/disc/manuals/ + * Interface%20manuals/100293068c.pdf + * + * Optional. See "feature-discard", "discard-alignment", + * "discard-granularity", and "discard-secure" in the XenBus node + * documentation above. + */ +#define BLKIF_OP_DISCARD 5 + +/* + * Recognized if "feature-max-indirect-segments" in present in the backend + * xenbus info. The "feature-max-indirect-segments" node contains the maximum + * number of segments allowed by the backend per request. If the node is + * present, the frontend might use blkif_request_indirect structs in order to + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The + * maximum number of indirect segments is fixed by the backend, but the + * frontend can issue requests with any number of indirect segments as long as + * it's less than the number provided by the backend. The indirect_grefs field + * in blkif_request_indirect should be filled by the frontend with the + * grant references of the pages that are holding the indirect segments. + * These pages are filled with an array of blkif_request_segment that hold the + * information about the segments. The number of indirect pages to use is + * determined by the number of segments an indirect request contains. Every + * indirect page can contain a maximum of + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to + * calculate the number of indirect pages to use we have to do + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). + * + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* + * create the "feature-max-indirect-segments" node! + */ +#define BLKIF_OP_INDIRECT 6 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. + */ +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 + +/* + * Maximum number of indirect pages to use per request. + */ +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 + +/* + * NB. first_sect and last_sect in blkif_request_segment, as well as + * sector_number in blkif_request, are always expressed in 512-byte units. + * However they must be properly aligned to the real sector size of the + * physical disk, which is reported in the "physical-sector-size" node in + * the backend xenbus info. Also the xenbus "sectors" node is expressed in + * 512-byte units. + */ +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; + +/* + * Starting ring element for any I/O request. + */ +struct blkif_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct blkif_request blkif_request_t; + +/* + * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD + * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) + */ +struct blkif_request_discard { + uint8_t operation; /* BLKIF_OP_DISCARD */ + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ +#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ + blkif_vdev_t handle; /* same as for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk */ + uint64_t nr_sectors; /* number of contiguous sectors to discard*/ +}; +typedef struct blkif_request_discard blkif_request_discard_t; + +struct blkif_request_indirect { + uint8_t operation; /* BLKIF_OP_INDIRECT */ + uint8_t indirect_op; /* BLKIF_OP_{READ/WRITE} */ + uint16_t nr_segments; /* number of segments */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + blkif_vdev_t handle; /* same as for read/write requests */ + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; +#ifdef __i386__ + uint64_t pad; /* Make it 64 byte aligned on i386 */ +#endif +}; +typedef struct blkif_request_indirect blkif_request_indirect_t; + +struct blkif_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_response blkif_response_t; + +/* + * STATUS RETURN CODES. + */ + /* Operation not supported (only happens on barrier writes). */ +#define BLKIF_RSP_EOPNOTSUPP -2 + /* Operation failed for some unspecified reason (-EIO). */ +#define BLKIF_RSP_ERROR -1 + /* Operation completed successfully. */ +#define BLKIF_RSP_OKAY 0 + +/* + * Generate blkif ring structures and types. + */ +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); + +#define VDISK_CDROM 0x1 +#define VDISK_REMOVABLE 0x2 +#define VDISK_READONLY 0x4 + +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/console.h xen-4.9.2/extras/mini-os/include/xen/io/console.h --- xen-4.9.0/extras/mini-os/include/xen/io/console.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/console.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,51 @@ +/****************************************************************************** + * console.h + * + * Console I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ +#define __XEN_PUBLIC_IO_CONSOLE_H__ + +typedef uint32_t XENCONS_RING_IDX; + +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1)) + +struct xencons_interface { + char in[1024]; + char out[2048]; + XENCONS_RING_IDX in_cons, in_prod; + XENCONS_RING_IDX out_cons, out_prod; +}; + +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/fbif.h xen-4.9.2/extras/mini-os/include/xen/io/fbif.h --- xen-4.9.0/extras/mini-os/include/xen/io/fbif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/fbif.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,176 @@ +/* + * fbif.h -- Xen virtual frame buffer device + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster + */ + +#ifndef __XEN_PUBLIC_IO_FBIF_H__ +#define __XEN_PUBLIC_IO_FBIF_H__ + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + */ + +/* Event type 1 currently not used */ +/* + * Framebuffer update notification event + * Capable frontend sets feature-update in xenstore. + * Backend requests it by setting request-update in xenstore. + */ +#define XENFB_TYPE_UPDATE 2 + +struct xenfb_update +{ + uint8_t type; /* XENFB_TYPE_UPDATE */ + int32_t x; /* source x */ + int32_t y; /* source y */ + int32_t width; /* rect width */ + int32_t height; /* rect height */ +}; + +/* + * Framebuffer resize notification event + * Capable backend sets feature-resize in xenstore. + */ +#define XENFB_TYPE_RESIZE 3 + +struct xenfb_resize +{ + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* offset of the framebuffer in bytes */ +}; + +#define XENFB_OUT_EVENT_SIZE 40 + +union xenfb_out_event +{ + uint8_t type; + struct xenfb_update update; + struct xenfb_resize resize; + char pad[XENFB_OUT_EVENT_SIZE]; +}; + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + */ + +/* + * Framebuffer refresh period advice + * Backend sends it to advise the frontend their preferred period of + * refresh. Frontends that keep the framebuffer constantly up-to-date + * just ignore it. Frontends that use the advice should immediately + * refresh the framebuffer (and send an update notification event if + * those have been requested), then use the update frequency to guide + * their periodical refreshs. + */ +#define XENFB_TYPE_REFRESH_PERIOD 1 +#define XENFB_NO_REFRESH 0 + +struct xenfb_refresh_period +{ + uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */ + uint32_t period; /* period of refresh, in ms, + * XENFB_NO_REFRESH if no refresh is needed */ +}; + +#define XENFB_IN_EVENT_SIZE 40 + +union xenfb_in_event +{ + uint8_t type; + struct xenfb_refresh_period refresh_period; + char pad[XENFB_IN_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENFB_IN_RING_SIZE 1024 +#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE) +#define XENFB_IN_RING_OFFS 1024 +#define XENFB_IN_RING(page) \ + ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) +#define XENFB_IN_RING_REF(page, idx) \ + (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) + +#define XENFB_OUT_RING_SIZE 2048 +#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE) +#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE) +#define XENFB_OUT_RING(page) \ + ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) +#define XENFB_OUT_RING_REF(page, idx) \ + (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) + +struct xenfb_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; + + int32_t width; /* the width of the framebuffer (in pixels) */ + int32_t height; /* the height of the framebuffer (in pixels) */ + uint32_t line_length; /* the length of a row of pixels (in bytes) */ + uint32_t mem_length; /* the length of the framebuffer (in bytes) */ + uint8_t depth; /* the depth of a pixel (in bits) */ + + /* + * Framebuffer page directory + * + * Each directory page holds PAGE_SIZE / sizeof(*pd) + * framebuffer pages, and can thus map up to PAGE_SIZE * + * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs + * 64 bit. 256 directories give enough room for a 512 Meg + * framebuffer with a max resolution of 12,800x10,240. Should + * be enough for a while with room leftover for expansion. + */ + unsigned long pd[256]; +}; + +/* + * Wart: xenkbd needs to know default resolution. Put it here until a + * better solution is found, but don't leak it to the backend. + */ +#ifdef __KERNEL__ +#define XENFB_WIDTH 800 +#define XENFB_HEIGHT 600 +#define XENFB_DEPTH 32 +#endif + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/fsif.h xen-4.9.2/extras/mini-os/include/xen/io/fsif.h --- xen-4.9.0/extras/mini-os/include/xen/io/fsif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/fsif.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,192 @@ +/****************************************************************************** + * fsif.h + * + * Interface to FS level split device drivers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007, Grzegorz Milos, . + */ + +#ifndef __XEN_PUBLIC_IO_FSIF_H__ +#define __XEN_PUBLIC_IO_FSIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +#define REQ_FILE_OPEN 1 +#define REQ_FILE_CLOSE 2 +#define REQ_FILE_READ 3 +#define REQ_FILE_WRITE 4 +#define REQ_STAT 5 +#define REQ_FILE_TRUNCATE 6 +#define REQ_REMOVE 7 +#define REQ_RENAME 8 +#define REQ_CREATE 9 +#define REQ_DIR_LIST 10 +#define REQ_CHMOD 11 +#define REQ_FS_SPACE 12 +#define REQ_FILE_SYNC 13 + +struct fsif_open_request { + grant_ref_t gref; +}; + +struct fsif_close_request { + uint32_t fd; +}; + +struct fsif_read_request { + uint32_t fd; + int32_t pad; + uint64_t len; + uint64_t offset; + grant_ref_t grefs[1]; /* Variable length */ +}; + +struct fsif_write_request { + uint32_t fd; + int32_t pad; + uint64_t len; + uint64_t offset; + grant_ref_t grefs[1]; /* Variable length */ +}; + +struct fsif_stat_request { + uint32_t fd; +}; + +/* This structure is a copy of some fields from stat structure, returned + * via the ring. */ +struct fsif_stat_response { + int32_t stat_mode; + uint32_t stat_uid; + uint32_t stat_gid; + int32_t stat_ret; + int64_t stat_size; + int64_t stat_atime; + int64_t stat_mtime; + int64_t stat_ctime; +}; + +struct fsif_truncate_request { + uint32_t fd; + int32_t pad; + int64_t length; +}; + +struct fsif_remove_request { + grant_ref_t gref; +}; + +struct fsif_rename_request { + uint16_t old_name_offset; + uint16_t new_name_offset; + grant_ref_t gref; +}; + +struct fsif_create_request { + int8_t directory; + int8_t pad; + int16_t pad2; + int32_t mode; + grant_ref_t gref; +}; + +struct fsif_list_request { + uint32_t offset; + grant_ref_t gref; +}; + +#define NR_FILES_SHIFT 0 +#define NR_FILES_SIZE 16 /* 16 bits for the number of files mask */ +#define NR_FILES_MASK (((1ULL << NR_FILES_SIZE) - 1) << NR_FILES_SHIFT) +#define ERROR_SIZE 32 /* 32 bits for the error mask */ +#define ERROR_SHIFT (NR_FILES_SIZE + NR_FILES_SHIFT) +#define ERROR_MASK (((1ULL << ERROR_SIZE) - 1) << ERROR_SHIFT) +#define HAS_MORE_SHIFT (ERROR_SHIFT + ERROR_SIZE) +#define HAS_MORE_FLAG (1ULL << HAS_MORE_SHIFT) + +struct fsif_chmod_request { + uint32_t fd; + int32_t mode; +}; + +struct fsif_space_request { + grant_ref_t gref; +}; + +struct fsif_sync_request { + uint32_t fd; +}; + + +/* FS operation request */ +struct fsif_request { + uint8_t type; /* Type of the request */ + uint8_t pad; + uint16_t id; /* Request ID, copied to the response */ + uint32_t pad2; + union { + struct fsif_open_request fopen; + struct fsif_close_request fclose; + struct fsif_read_request fread; + struct fsif_write_request fwrite; + struct fsif_stat_request fstat; + struct fsif_truncate_request ftruncate; + struct fsif_remove_request fremove; + struct fsif_rename_request frename; + struct fsif_create_request fcreate; + struct fsif_list_request flist; + struct fsif_chmod_request fchmod; + struct fsif_space_request fspace; + struct fsif_sync_request fsync; + } u; +}; +typedef struct fsif_request fsif_request_t; + +/* FS operation response */ +struct fsif_response { + uint16_t id; + uint16_t pad1; + uint32_t pad2; + union { + uint64_t ret_val; + struct fsif_stat_response fstat; + } u; +}; + +typedef struct fsif_response fsif_response_t; + +#define FSIF_RING_ENTRY_SIZE 64 + +#define FSIF_NR_READ_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_read_request)) / \ + sizeof(grant_ref_t) + 1) +#define FSIF_NR_WRITE_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_write_request)) / \ + sizeof(grant_ref_t) + 1) + +DEFINE_RING_TYPES(fsif, struct fsif_request, struct fsif_response); + +#define STATE_INITIALISED "init" +#define STATE_READY "ready" +#define STATE_CLOSING "closing" +#define STATE_CLOSED "closed" + + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/kbdif.h xen-4.9.2/extras/mini-os/include/xen/io/kbdif.h --- xen-4.9.0/extras/mini-os/include/xen/io/kbdif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/kbdif.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,132 @@ +/* + * kbdif.h -- Xen virtual keyboard/mouse + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster + */ + +#ifndef __XEN_PUBLIC_IO_KBDIF_H__ +#define __XEN_PUBLIC_IO_KBDIF_H__ + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + */ + +/* Pointer movement event */ +#define XENKBD_TYPE_MOTION 1 +/* Event type 2 currently not used */ +/* Key event (includes pointer buttons) */ +#define XENKBD_TYPE_KEY 3 +/* + * Pointer position event + * Capable backend sets feature-abs-pointer in xenstore. + * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting + * request-abs-update in xenstore. + */ +#define XENKBD_TYPE_POS 4 + +struct xenkbd_motion +{ + uint8_t type; /* XENKBD_TYPE_MOTION */ + int32_t rel_x; /* relative X motion */ + int32_t rel_y; /* relative Y motion */ + int32_t rel_z; /* relative Z motion (wheel) */ +}; + +struct xenkbd_key +{ + uint8_t type; /* XENKBD_TYPE_KEY */ + uint8_t pressed; /* 1 if pressed; 0 otherwise */ + uint32_t keycode; /* KEY_* from linux/input.h */ +}; + +struct xenkbd_position +{ + uint8_t type; /* XENKBD_TYPE_POS */ + int32_t abs_x; /* absolute X position (in FB pixels) */ + int32_t abs_y; /* absolute Y position (in FB pixels) */ + int32_t rel_z; /* relative Z motion (wheel) */ +}; + +#define XENKBD_IN_EVENT_SIZE 40 + +union xenkbd_in_event +{ + uint8_t type; + struct xenkbd_motion motion; + struct xenkbd_key key; + struct xenkbd_position pos; + char pad[XENKBD_IN_EVENT_SIZE]; +}; + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + * No out events currently defined. + */ + +#define XENKBD_OUT_EVENT_SIZE 40 + +union xenkbd_out_event +{ + uint8_t type; + char pad[XENKBD_OUT_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENKBD_IN_RING_SIZE 2048 +#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) +#define XENKBD_IN_RING_OFFS 1024 +#define XENKBD_IN_RING(page) \ + ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) +#define XENKBD_IN_RING_REF(page, idx) \ + (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) + +#define XENKBD_OUT_RING_SIZE 1024 +#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) +#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) +#define XENKBD_OUT_RING(page) \ + ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) +#define XENKBD_OUT_RING_REF(page, idx) \ + (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) + +struct xenkbd_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; +}; + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/libxenvchan.h xen-4.9.2/extras/mini-os/include/xen/io/libxenvchan.h --- xen-4.9.0/extras/mini-os/include/xen/io/libxenvchan.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/libxenvchan.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,97 @@ +/** + * @file + * @section AUTHORS + * + * Copyright (C) 2010 Rafal Wojtczuk + * + * Authors: + * Rafal Wojtczuk + * Daniel De Graaf + * + * @section LICENSE + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * @section DESCRIPTION + * + * Originally borrowed from the Qubes OS Project, http://www.qubes-os.org, + * this code has been substantially rewritten to use the gntdev and gntalloc + * devices instead of raw MFNs and map_foreign_range. + * + * This is a library for inter-domain communication. A standard Xen ring + * buffer is used, with a datagram-based interface built on top. The grant + * reference and event channels are shared in XenStore under a user-specified + * path. + * + * The ring.h macros define an asymmetric interface to a shared data structure + * that assumes all rings reside in a single contiguous memory space. This is + * not suitable for vchan because the interface to the ring is symmetric except + * for the setup. Unlike the producer-consumer rings defined in ring.h, the + * size of the rings used in vchan are determined at execution time instead of + * compile time, so the macros in ring.h cannot be used to access the rings. + */ + +#include +#include + +struct ring_shared { + uint32_t cons, prod; +}; + +#define VCHAN_NOTIFY_WRITE 0x1 +#define VCHAN_NOTIFY_READ 0x2 + +/** + * vchan_interface: primary shared data structure + */ +struct vchan_interface { + /** + * Standard consumer/producer interface, one pair per buffer + * left is client write, server read + * right is client read, server write + */ + struct ring_shared left, right; + /** + * size of the rings, which determines their location + * 10 - at offset 1024 in ring's page + * 11 - at offset 2048 in ring's page + * 12+ - uses 2^(N-12) grants to describe the multi-page ring + * These should remain constant once the page is shared. + * Only one of the two orders can be 10 (or 11). + */ + uint16_t left_order, right_order; + /** + * Shutdown detection: + * 0: client (or server) has exited + * 1: client (or server) is connected + * 2: client has not yet connected + */ + uint8_t cli_live, srv_live; + /** + * Notification bits: + * VCHAN_NOTIFY_WRITE: send notify when data is written + * VCHAN_NOTIFY_READ: send notify when data is read (consumed) + * cli_notify is used for the client to inform the server of its action + */ + uint8_t cli_notify, srv_notify; + /** + * Grant list: ordering is left, right. Must not extend into actual ring + * or grow beyond the end of the initial shared page. + * These should remain constant once the page is shared, to allow + * for possible remapping by a client that restarts. + */ + uint32_t grants[0]; +}; + diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/netif.h xen-4.9.2/extras/mini-os/include/xen/io/netif.h --- xen-4.9.0/extras/mini-os/include/xen/io/netif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/netif.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,305 @@ +/****************************************************************************** + * netif.h + * + * Unified network-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_NETIF_H__ +#define __XEN_PUBLIC_IO_NETIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18 + +/* + * Notifications after enqueuing any type of message should be conditional on + * the appropriate req_event or rsp_event field in the shared ring. + * If the client sends notification for rx requests then it should specify + * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume + * that it cannot safely queue packets (as it may not be kicked to send them). + */ + +/* + * "feature-split-event-channels" is introduced to separate guest TX + * and RX notification. Backend either doesn't support this feature or + * advertises it via xenstore as 0 (disabled) or 1 (enabled). + * + * To make use of this feature, frontend should allocate two event + * channels for TX and RX, advertise them to backend as + * "event-channel-tx" and "event-channel-rx" respectively. If frontend + * doesn't want to use this feature, it just writes "event-channel" + * node as before. + */ + +/* + * Multiple transmit and receive queues: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vif, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues", set to the number they wish to use, which + * must be greater than zero, and no more than the value reported by the backend + * in "multi-queue-max-queues". + * + * Queues replicate the shared rings and event channels. + * "feature-split-event-channels" may optionally be used when using + * multiple queues, but is not mandatory. + * + * Each queue consists of one shared ring pair, i.e. there must be the same + * number of tx and rx rings. + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys, + * instead writing those keys under sub-keys having the name "queue-N" where + * N is the integer ID of the queue for which those keys belong. Queues + * are indexed from zero. For example, a frontend with two queues and split + * event channels must write the following set of queue-related keys: + * + * /local/domain/1/device/vif/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vif/0/queue-0 = "" + * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "" + * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "" + * /local/domain/1/device/vif/0/queue-1 = "" + * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = " + */ +#ifndef __XEN_PCI_COMMON_H__ +#define __XEN_PCI_COMMON_H__ + +/* Be sure to bump this number if you change this file */ +#define XEN_PCI_MAGIC "7" + +/* xen_pci_sharedinfo flags */ +#define _XEN_PCIF_active (0) +#define XEN_PCIF_active (1<<_XEN_PCIF_active) +#define _XEN_PCIB_AERHANDLER (1) +#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) +#define _XEN_PCIB_active (2) +#define XEN_PCIB_active (1<<_XEN_PCIB_active) + +/* xen_pci_op commands */ +#define XEN_PCI_OP_conf_read (0) +#define XEN_PCI_OP_conf_write (1) +#define XEN_PCI_OP_enable_msi (2) +#define XEN_PCI_OP_disable_msi (3) +#define XEN_PCI_OP_enable_msix (4) +#define XEN_PCI_OP_disable_msix (5) +#define XEN_PCI_OP_aer_detected (6) +#define XEN_PCI_OP_aer_resume (7) +#define XEN_PCI_OP_aer_mmio (8) +#define XEN_PCI_OP_aer_slotreset (9) +#define XEN_PCI_OP_enable_multi_msi (10) + +/* xen_pci_op error numbers */ +#define XEN_PCI_ERR_success (0) +#define XEN_PCI_ERR_dev_not_found (-1) +#define XEN_PCI_ERR_invalid_offset (-2) +#define XEN_PCI_ERR_access_denied (-3) +#define XEN_PCI_ERR_not_implemented (-4) +/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ +#define XEN_PCI_ERR_op_failed (-5) + +/* + * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry)) + * Should not exceed 128 + */ +#define SH_INFO_MAX_VEC 128 + +struct xen_msix_entry { + uint16_t vector; + uint16_t entry; +}; +struct xen_pci_op { + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + + /* OUT: will contain an error number (if any) from errno.h */ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment */ + uint32_t bus; + uint32_t devfn; + + /* IN: which configuration registers to touch */ + int32_t offset; + int32_t size; + + /* IN/OUT: Contains the result after a READ or the value to WRITE */ + uint32_t value; + /* IN: Contains extra infor for this operation */ + uint32_t info; + /*IN: param for msi-x */ + struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; +}; + +/*used for pcie aer handling*/ +struct xen_pcie_aer_op +{ + + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + /*IN/OUT: return aer_op result or carry error_detected state as input*/ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment*/ + uint32_t bus; + uint32_t devfn; +}; +struct xen_pci_sharedinfo { + /* flags - XEN_PCIF_* */ + uint32_t flags; + struct xen_pci_op op; + struct xen_pcie_aer_op aer_op; +}; + +#endif /* __XEN_PCI_COMMON_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/protocols.h xen-4.9.2/extras/mini-os/include/xen/io/protocols.h --- xen-4.9.0/extras/mini-os/include/xen/io/protocols.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/protocols.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,40 @@ +/****************************************************************************** + * protocols.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PROTOCOLS_H__ +#define __XEN_PROTOCOLS_H__ + +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" +#define XEN_IO_PROTO_ABI_ARM "arm-abi" + +#if defined(__i386__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 +#elif defined(__x86_64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 +#elif defined(__arm__) || defined(__aarch64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM +#else +# error arch fixup needed here +#endif + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/ring.h xen-4.9.2/extras/mini-os/include/xen/io/ring.h --- xen-4.9.0/extras/mini-os/include/xen/io/ring.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/ring.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,312 @@ +/****************************************************************************** + * ring.h + * + * Shared producer-consumer ring macros. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Tim Deegan and Andrew Warfield November 2004. + */ + +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ + +#include "../xen-compat.h" + +#if __XEN_INTERFACE_VERSION__ < 0x00030208 +#define xen_mb() mb() +#define xen_rmb() rmb() +#define xen_wmb() wmb() +#endif + +typedef unsigned int RING_IDX; + +/* Round a 32-bit unsigned constant down to the nearest power of two. */ +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) + +/* + * Calculate size of a shared ring, given the total available space for the + * ring and indexes (_sz), and the name tag of the request/response structure. + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + +/* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, + * let's say request_t, and response_t already defined. + * + * In a header where you want the ring datatype declared, you then do: + * + * DEFINE_RING_TYPES(mytag, request_t, response_t); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + */ + +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + union { \ + struct { \ + uint8_t smartpoll_active; \ + } netif; \ + struct { \ + uint8_t msg; \ + } tapif_user; \ + uint8_t pvt_pad[4]; \ + } private; \ + uint8_t __pad[44]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* Syntactic sugar */ \ +typedef struct __name##_sring __name##_sring_t; \ +typedef struct __name##_front_ring __name##_front_ring_t; \ +typedef struct __name##_back_ring __name##_back_ring_t + +/* + * Macros for manipulating rings. + * + * FRONT_RING_whatever works on the "front end" of a ring: here + * requests are pushed on to the ring and responses taken off it. + * + * BACK_RING_whatever works on the "back end" of a ring: here + * requests are taken off the ring and responses put on. + * + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * This is OK in 1-for-1 request-response situations where the + * requestor (front end) never has more than RING_SIZE()-1 + * outstanding requests. + */ + +/* Initialising empty rings */ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ +} while(0) + +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +/* How big is this ring? */ +#define RING_SIZE(_r) \ + ((_r)->nr_ents) + +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + +/* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + +/* Test if there are outstanding messages to be processed on a ring. */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +#ifdef __GNUC__ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) +#else +/* Same as above, but without the nice GCC ({ ... }) syntax. */ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ((((_r)->sring->req_prod - (_r)->req_cons) < \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ + ((_r)->sring->req_prod - (_r)->req_cons) : \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) +#endif + +/* Direct access to individual ring elements, by index. */ +#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + +/* Loop termination condition: Would the specified index overflow the ring? */ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + +#define RING_PUSH_REQUESTS(_r) do { \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +} while (0) + +#define RING_PUSH_RESPONSES(_r) do { \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +} while (0) + +/* + * Notification hold-off (req_event and rsp_event): + * + * When queueing requests or responses on a shared ring, it may not always be + * necessary to notify the remote end. For example, if requests are in flight + * in a backend, the front may be able to queue further requests without + * notifying the back (if the back checks for new requests when it queues + * responses). + * + * When enqueuing requests or responses: + * + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument + * is a boolean return value. True indicates that the receiver requires an + * asynchronous notification. + * + * After dequeuing requests or responses (before sleeping the connection): + * + * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). + * The second argument is a boolean return value. True indicates that there + * are pending messages on the ring (i.e., the connection should not be put + * to sleep). + * + * These macros will set the req_event/rsp_event field to trigger a + * notification on the very next message that is enqueued. If you want to + * create batches of work (i.e., only receive a notification after several + * messages have been enqueued) then you will need to create a customised + * version of the FINAL_CHECK macro in your own code, which sets the event + * field appropriately. + */ + +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + xen_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +} while (0) + +#endif /* __XEN_PUBLIC_IO_RING_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/tpmif.h xen-4.9.2/extras/mini-os/include/xen/io/tpmif.h --- xen-4.9.0/extras/mini-os/include/xen/io/tpmif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/tpmif.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,143 @@ +/****************************************************************************** + * tpmif.h + * + * TPM I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@us.ibm.com + * Grant table support: Mahadevan Gomathisankaran + * + * This code has been derived from tools/libxc/xen/io/netif.h + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_TPMIF_H__ +#define __XEN_PUBLIC_IO_TPMIF_H__ + +#include "../grant_table.h" + +struct tpmif_tx_request { + unsigned long addr; /* Machine address of packet. */ + grant_ref_t ref; /* grant table access reference */ + uint16_t unused; + uint16_t size; /* Packet size in bytes. */ +}; +typedef struct tpmif_tx_request tpmif_tx_request_t; + +/* + * The TPMIF_TX_RING_SIZE defines the number of pages the + * front-end and backend can exchange (= size of array). + */ +typedef uint32_t TPMIF_RING_IDX; + +#define TPMIF_TX_RING_SIZE 1 + +/* This structure must fit in a memory page. */ + +struct tpmif_ring { + struct tpmif_tx_request req; +}; +typedef struct tpmif_ring tpmif_ring_t; + +struct tpmif_tx_interface { + struct tpmif_ring ring[TPMIF_TX_RING_SIZE]; +}; +typedef struct tpmif_tx_interface tpmif_tx_interface_t; + +/****************************************************************************** + * TPM I/O interface for Xen guest OSes, v2 + * + * Author: Daniel De Graaf + * + * This protocol emulates the request/response behavior of a TPM using a Xen + * shared memory interface. All interaction with the TPM is at the direction + * of the frontend, since a TPM (hardware or virtual) is a passive device - + * the backend only processes commands as requested by the frontend. + * + * The frontend sends a request to the TPM by populating the shared page with + * the request packet, changing the state to TPMIF_STATE_SUBMIT, and sending + * and event channel notification. When the backend is finished, it will set + * the state to TPMIF_STATE_FINISH and send an event channel notification. + * + * In order to allow long-running commands to be canceled, the frontend can + * at any time change the state to TPMIF_STATE_CANCEL and send a notification. + * The TPM can either finish the command (changing state to TPMIF_STATE_FINISH) + * or can cancel the command and change the state to TPMIF_STATE_IDLE. The TPM + * can also change the state to TPMIF_STATE_IDLE instead of TPMIF_STATE_FINISH + * if another reason for cancellation is required - for example, a physical + * TPM may cancel a command if the interface is seized by another locality. + * + * The TPM command format is defined by the TCG, and is available at + * http://www.trustedcomputinggroup.org/resources/tpm_main_specification + */ + +enum tpmif_state { + TPMIF_STATE_IDLE, /* no contents / vTPM idle / cancel complete */ + TPMIF_STATE_SUBMIT, /* request ready / vTPM working */ + TPMIF_STATE_FINISH, /* response ready / vTPM idle */ + TPMIF_STATE_CANCEL, /* cancel requested / vTPM working */ +}; +/* Note: The backend should only change state to IDLE or FINISH, while the + * frontend should only change to SUBMIT or CANCEL. Status changes do not need + * to use atomic operations. + */ + + +/* The shared page for vTPM request/response packets looks like: + * + * Offset Contents + * ================================================= + * 0 struct tpmif_shared_page + * 16 [optional] List of grant IDs + * 16+4*nr_extra_pages TPM packet data + * + * If the TPM packet data extends beyond the end of a single page, the grant IDs + * defined in extra_pages are used as if they were mapped immediately following + * the primary shared page. The grants are allocated by the frontend and mapped + * by the backend. Before sending a request spanning multiple pages, the + * frontend should verify that the TPM supports such large requests by querying + * the TPM_CAP_PROP_INPUT_BUFFER property from the TPM. + */ +struct tpmif_shared_page { + uint32_t length; /* request/response length in bytes */ + + uint8_t state; /* enum tpmif_state */ + uint8_t locality; /* for the current request */ + uint8_t pad; /* should be zero */ + + uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */ + uint32_t extra_pages[0]; /* grant IDs; length is actually nr_extra_pages */ +}; +typedef struct tpmif_shared_page tpmif_shared_page_t; + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/usbif.h xen-4.9.2/extras/mini-os/include/xen/io/usbif.h --- xen-4.9.0/extras/mini-os/include/xen/io/usbif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/usbif.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,150 @@ +/* + * usbif.h + * + * USB I/O interface for Xen guest OSes. + * + * Copyright (C) 2009, FUJITSU LABORATORIES LTD. + * Author: Noboru Iwamatsu + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_IO_USBIF_H__ +#define __XEN_PUBLIC_IO_USBIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +enum usb_spec_version { + USB_VER_UNKNOWN = 0, + USB_VER_USB11, + USB_VER_USB20, + USB_VER_USB30, /* not supported yet */ +}; + +/* + * USB pipe in usbif_request + * + * bits 0-5 are specific bits for virtual USB driver. + * bits 7-31 are standard urb pipe. + * + * - port number(NEW): bits 0-4 + * (USB_MAXCHILDREN is 31) + * + * - operation flag(NEW): bit 5 + * (0 = submit urb, + * 1 = unlink urb) + * + * - direction: bit 7 + * (0 = Host-to-Device [Out] + * 1 = Device-to-Host [In]) + * + * - device address: bits 8-14 + * + * - endpoint: bits 15-18 + * + * - pipe type: bits 30-31 + * (00 = isochronous, 01 = interrupt, + * 10 = control, 11 = bulk) + */ +#define usbif_pipeportnum(pipe) ((pipe) & 0x1f) +#define usbif_setportnum_pipe(pipe, portnum) \ + ((pipe)|(portnum)) + +#define usbif_pipeunlink(pipe) ((pipe) & 0x20) +#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe)) +#define usbif_setunlink_pipe(pipe) ((pipe)|(0x20)) + +#define USBIF_MAX_SEGMENTS_PER_REQUEST (16) + +/* + * RING for transferring urbs. + */ +struct usbif_request_segment { + grant_ref_t gref; + uint16_t offset; + uint16_t length; +}; + +struct usbif_urb_request { + uint16_t id; /* request id */ + uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */ + + /* basic urb parameter */ + uint32_t pipe; + uint16_t transfer_flags; + uint16_t buffer_length; + union { + uint8_t ctrl[8]; /* setup_packet (Ctrl) */ + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t start_frame; /* start frame */ + uint16_t number_of_packets; /* number of ISO packet */ + uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */ + } isoc; + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t pad[3]; + } intr; + + struct { + uint16_t unlink_id; /* unlink request id */ + uint16_t pad[3]; + } unlink; + + } u; + + /* urb data segments */ + struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct usbif_urb_request usbif_urb_request_t; + +struct usbif_urb_response { + uint16_t id; /* request id */ + uint16_t start_frame; /* start frame (ISO) */ + int32_t status; /* status (non-ISO) */ + int32_t actual_length; /* actual transfer length */ + int32_t error_count; /* number of ISO errors */ +}; +typedef struct usbif_urb_response usbif_urb_response_t; + +DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response); +#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, PAGE_SIZE) + +/* + * RING for notifying connect/disconnect events to frontend + */ +struct usbif_conn_request { + uint16_t id; +}; +typedef struct usbif_conn_request usbif_conn_request_t; + +struct usbif_conn_response { + uint16_t id; /* request id */ + uint8_t portnum; /* port number */ + uint8_t speed; /* usb_device_speed */ +}; +typedef struct usbif_conn_response usbif_conn_response_t; + +DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response); +#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, PAGE_SIZE) + +#endif /* __XEN_PUBLIC_IO_USBIF_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/vscsiif.h xen-4.9.2/extras/mini-os/include/xen/io/vscsiif.h --- xen-4.9.0/extras/mini-os/include/xen/io/vscsiif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/vscsiif.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,260 @@ +/****************************************************************************** + * vscsiif.h + * + * Based on the blkif.h code. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright(c) FUJITSU Limited 2008. + */ + +#ifndef __XEN__PUBLIC_IO_SCSI_H__ +#define __XEN__PUBLIC_IO_SCSI_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen pvSCSI driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * p-devname + * Values: string + * + * A free string used to identify the physical device (e.g. a disk name). + * + * p-dev + * Values: string + * + * A string specifying the backend device: either a 4-tuple "h:c:t:l" + * (host, controller, target, lun, all integers), or a WWN (e.g. + * "naa.60014054ac780582"). + * + * v-dev + * Values: string + * + * A string specifying the frontend device in form of a 4-tuple "h:c:t:l" + * (host, controller, target, lun, all integers). + * + *--------------------------------- Features --------------------------------- + * + * feature-sg-grant + * Values: unsigned [VSCSIIF_SG_TABLESIZE...65535] + * Default Value: 0 + * + * Specifies the maximum number of scatter/gather elements in grant pages + * supported. If not set, the backend supports up to VSCSIIF_SG_TABLESIZE + * SG elements specified directly in the request. + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: unsigned + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * ring-ref + * Values: unsigned + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + */ + +/* Requests from the frontend to the backend */ + +/* + * Request a SCSI operation specified via a CDB in vscsiif_request.cmnd. + * The target is specified via channel, id and lun. + * + * The operation to be performed is specified via a CDB in cmnd[], the length + * of the CDB is in cmd_len. sc_data_direction specifies the direction of data + * (to the device, from the device, or none at all). + * + * If data is to be transferred to or from the device the buffer(s) in the + * guest memory is/are specified via one or multiple scsiif_request_segment + * descriptors each specifying a memory page via a grant_ref_t, a offset into + * the page and the length of the area in that page. All scsiif_request_segment + * areas concatenated form the resulting data buffer used by the operation. + * If the number of scsiif_request_segment areas is not too large (less than + * or equal VSCSIIF_SG_TABLESIZE) the areas can be specified directly in the + * seg[] array and the number of valid scsiif_request_segment elements is to be + * set in nr_segments. + * + * If "feature-sg-grant" in the Xenstore is set it is possible to specify more + * than VSCSIIF_SG_TABLESIZE scsiif_request_segment elements via indirection. + * The maximum number of allowed scsiif_request_segment elements is the value + * of the "feature-sg-grant" entry from Xenstore. When using indirection the + * seg[] array doesn't contain specifications of the data buffers, but + * references to scsiif_request_segment arrays, which in turn reference the + * data buffers. While nr_segments holds the number of populated seg[] entries + * (plus the set VSCSIIF_SG_GRANT bit), the number of scsiif_request_segment + * elements referencing the target data buffers is calculated from the lengths + * of the seg[] elements (the sum of all valid seg[].length divided by the + * size of one scsiif_request_segment structure). The frontend may use a mix of + * direct and indirect requests. + */ +#define VSCSIIF_ACT_SCSI_CDB 1 + +/* + * Request abort of a running operation for the specified target given by + * channel, id, lun and the operation's rqid in ref_rqid. + */ +#define VSCSIIF_ACT_SCSI_ABORT 2 + +/* + * Request a device reset of the specified target (channel and id). + */ +#define VSCSIIF_ACT_SCSI_RESET 3 + +/* + * Preset scatter/gather elements for a following request. Deprecated. + * Keeping the define only to avoid usage of the value "4" for other actions. + */ +#define VSCSIIF_ACT_SCSI_SG_PRESET 4 + +/* + * Maximum scatter/gather segments per request. + * + * Considering balance between allocating at least 16 "vscsiif_request" + * structures on one page (4096 bytes) and the number of scatter/gather + * elements needed, we decided to use 26 as a magic number. + * + * If "feature-sg-grant" is set, more scatter/gather elements can be specified + * by placing them in one or more (up to VSCSIIF_SG_TABLESIZE) granted pages. + * In this case the vscsiif_request seg elements don't contain references to + * the user data, but to the SG elements referencing the user data. + */ +#define VSCSIIF_SG_TABLESIZE 26 + +/* + * based on Linux kernel 2.6.18, still valid + * + * Changing these values requires support of multiple protocols via the rings + * as "old clients" will blindly use these values and the resulting structure + * sizes. + */ +#define VSCSIIF_MAX_COMMAND_SIZE 16 +#define VSCSIIF_SENSE_BUFFERSIZE 96 + +struct scsiif_request_segment { + grant_ref_t gref; + uint16_t offset; + uint16_t length; +}; +typedef struct scsiif_request_segment vscsiif_segment_t; + +#define VSCSIIF_SG_PER_PAGE (PAGE_SIZE / sizeof(struct scsiif_request_segment)) + +/* Size of one request is 252 bytes */ +struct vscsiif_request { + uint16_t rqid; /* private guest value, echoed in resp */ + uint8_t act; /* command between backend and frontend */ + uint8_t cmd_len; /* valid CDB bytes */ + + uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; /* the CDB */ + uint16_t timeout_per_command; /* deprecated: timeout in secs, 0=default */ + uint16_t channel, id, lun; /* (virtual) device specification */ + uint16_t ref_rqid; /* command abort reference */ + uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1) + DMA_FROM_DEVICE(2) + DMA_NONE(3) requests */ + uint8_t nr_segments; /* Number of pieces of scatter-gather */ +/* + * flag in nr_segments: SG elements via grant page + * + * If VSCSIIF_SG_GRANT is set, the low 7 bits of nr_segments specify the number + * of grant pages containing SG elements. Usable if "feature-sg-grant" set. + */ +#define VSCSIIF_SG_GRANT 0x80 + + vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE]; + uint32_t reserved[3]; +}; +typedef struct vscsiif_request vscsiif_request_t; + +/* + * The following interface is deprecated! + */ +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \ + / sizeof(vscsiif_segment_t)) + +struct vscsiif_sg_list { + /* First two fields must match struct vscsiif_request! */ + uint16_t rqid; /* private guest value, must match main req */ + uint8_t act; /* VSCSIIF_ACT_SCSI_SG_PRESET */ + uint8_t nr_segments; /* Number of pieces of scatter-gather */ + vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE]; +}; +typedef struct vscsiif_sg_list vscsiif_sg_list_t; +/* End of deprecated interface */ + +/* Size of one response is 252 bytes */ +struct vscsiif_response { + uint16_t rqid; /* identifies request */ + uint8_t act; /* deprecated: valid only if SG_PRESET supported */ + uint8_t sense_len; + uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE]; + int32_t rslt; + uint32_t residual_len; /* request bufflen - + return the value from physical device */ + uint32_t reserved[36]; +}; +typedef struct vscsiif_response vscsiif_response_t; + +DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response); + + +#endif /*__XEN__PUBLIC_IO_SCSI_H__*/ +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/xenbus.h xen-4.9.2/extras/mini-os/include/xen/io/xenbus.h --- xen-4.9.0/extras/mini-os/include/xen/io/xenbus.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/xenbus.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,80 @@ +/***************************************************************************** + * xenbus.h + * + * Xenbus protocol details. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 XenSource Ltd. + */ + +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H + +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, + + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6, + + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 +}; +typedef enum xenbus_state XenbusState; + +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/io/xs_wire.h xen-4.9.2/extras/mini-os/include/xen/io/xs_wire.h --- xen-4.9.0/extras/mini-os/include/xen/io/xs_wire.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/io/xs_wire.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,149 @@ +/* + * Details of the "wire" protocol between Xen Store Daemon and client + * library or guest kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Rusty Russell IBM Corporation + */ + +#ifndef _XS_WIRE_H +#define _XS_WIRE_H + +enum xsd_sockmsg_type +{ + XS_DEBUG, + XS_DIRECTORY, + XS_READ, + XS_GET_PERMS, + XS_WATCH, + XS_UNWATCH, + XS_TRANSACTION_START, + XS_TRANSACTION_END, + XS_INTRODUCE, + XS_RELEASE, + XS_GET_DOMAIN_PATH, + XS_WRITE, + XS_MKDIR, + XS_RM, + XS_SET_PERMS, + XS_WATCH_EVENT, + XS_ERROR, + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME, + XS_SET_TARGET, + XS_RESTRICT, + XS_RESET_WATCHES, + + XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */ +}; + +#define XS_WRITE_NONE "NONE" +#define XS_WRITE_CREATE "CREATE" +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" + +/* We hand errors as strings, for portability. */ +struct xsd_errors +{ + int errnum; + const char *errstring; +}; +#ifdef EINVAL +#define XSD_ERROR(x) { x, #x } +/* LINTED: static unused */ +static struct xsd_errors xsd_errors[] +#if defined(__GNUC__) +__attribute__((unused)) +#endif + = { + XSD_ERROR(EINVAL), + XSD_ERROR(EACCES), + XSD_ERROR(EEXIST), + XSD_ERROR(EISDIR), + XSD_ERROR(ENOENT), + XSD_ERROR(ENOMEM), + XSD_ERROR(ENOSPC), + XSD_ERROR(EIO), + XSD_ERROR(ENOTEMPTY), + XSD_ERROR(ENOSYS), + XSD_ERROR(EROFS), + XSD_ERROR(EBUSY), + XSD_ERROR(EAGAIN), + XSD_ERROR(EISCONN), + XSD_ERROR(E2BIG) +}; +#endif + +struct xsd_sockmsg +{ + uint32_t type; /* XS_??? */ + uint32_t req_id;/* Request identifier, echoed in daemon's response. */ + uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */ + uint32_t len; /* Length of data following this. */ + + /* Generally followed by nul-terminated string(s). */ +}; + +enum xs_watch_type +{ + XS_WATCH_PATH = 0, + XS_WATCH_TOKEN +}; + +/* + * `incontents 150 xenstore_struct XenStore wire protocol. + * + * Inter-domain shared memory communications. */ +#define XENSTORE_RING_SIZE 1024 +typedef uint32_t XENSTORE_RING_IDX; +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1)) +struct xenstore_domain_interface { + char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */ + char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */ + XENSTORE_RING_IDX req_cons, req_prod; + XENSTORE_RING_IDX rsp_cons, rsp_prod; + uint32_t server_features; /* Bitmap of features supported by the server */ + uint32_t connection; +}; + +/* Violating this is very bad. See docs/misc/xenstore.txt. */ +#define XENSTORE_PAYLOAD_MAX 4096 + +/* Violating these just gets you an error back */ +#define XENSTORE_ABS_PATH_MAX 3072 +#define XENSTORE_REL_PATH_MAX 2048 + +/* The ability to reconnect a ring */ +#define XENSTORE_SERVER_FEATURE_RECONNECTION 1 + +/* Valid values for the connection field */ +#define XENSTORE_CONNECTED 0 /* the steady-state */ +#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */ + +#endif /* _XS_WIRE_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/kexec.h xen-4.9.2/extras/mini-os/include/xen/kexec.h --- xen-4.9.0/extras/mini-os/include/xen/kexec.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/kexec.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,249 @@ +/****************************************************************************** + * kexec.h - Public portion + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Xen port written by: + * - Simon 'Horms' Horman + * - Magnus Damm + */ + +#ifndef _XEN_PUBLIC_KEXEC_H +#define _XEN_PUBLIC_KEXEC_H + + +/* This file describes the Kexec / Kdump hypercall interface for Xen. + * + * Kexec under vanilla Linux allows a user to reboot the physical machine + * into a new user-specified kernel. The Xen port extends this idea + * to allow rebooting of the machine from dom0. When kexec for dom0 + * is used to reboot, both the hypervisor and the domains get replaced + * with some other kernel. It is possible to kexec between vanilla + * Linux and Xen and back again. Xen to Xen works well too. + * + * The hypercall interface for kexec can be divided into three main + * types of hypercall operations: + * + * 1) Range information: + * This is used by the dom0 kernel to ask the hypervisor about various + * address information. This information is needed to allow kexec-tools + * to fill in the ELF headers for /proc/vmcore properly. + * + * 2) Load and unload of images: + * There are no big surprises here, the kexec binary from kexec-tools + * runs in userspace in dom0. The tool loads/unloads data into the + * dom0 kernel such as new kernel, initramfs and hypervisor. When + * loaded the dom0 kernel performs a load hypercall operation, and + * before releasing all page references the dom0 kernel calls unload. + * + * 3) Kexec operation: + * This is used to start a previously loaded kernel. + */ + +#include "xen.h" + +#if defined(__i386__) || defined(__x86_64__) +#define KEXEC_XEN_NO_PAGES 17 +#endif + +/* + * Prototype for this hypercall is: + * int kexec_op(int cmd, void *args) + * @cmd == KEXEC_CMD_... + * KEXEC operation to perform + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Kexec supports two types of operation: + * - kexec into a regular kernel, very similar to a standard reboot + * - KEXEC_TYPE_DEFAULT is used to specify this type + * - kexec into a special "crash kernel", aka kexec-on-panic + * - KEXEC_TYPE_CRASH is used to specify this type + * - parts of our system may be broken at kexec-on-panic time + * - the code should be kept as simple and self-contained as possible + */ + +#define KEXEC_TYPE_DEFAULT 0 +#define KEXEC_TYPE_CRASH 1 + + +/* The kexec implementation for Xen allows the user to load two + * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH. + * All data needed for a kexec reboot is kept in one xen_kexec_image_t + * per "instance". The data mainly consists of machine address lists to pages + * together with destination addresses. The data in xen_kexec_image_t + * is passed to the "code page" which is one page of code that performs + * the final relocations before jumping to the new kernel. + */ + +typedef struct xen_kexec_image { +#if defined(__i386__) || defined(__x86_64__) + unsigned long page_list[KEXEC_XEN_NO_PAGES]; +#endif + unsigned long indirection_page; + unsigned long start_address; +} xen_kexec_image_t; + +/* + * Perform kexec having previously loaded a kexec or kdump kernel + * as appropriate. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + * + * Control is transferred to the image entry point with the host in + * the following state. + * + * - The image may be executed on any PCPU and all other PCPUs are + * stopped. + * + * - Local interrupts are disabled. + * + * - Register values are undefined. + * + * - The image segments have writeable 1:1 virtual to machine + * mappings. The location of any page tables is undefined and these + * page table frames are not be mapped. + */ +#define KEXEC_CMD_kexec 0 +typedef struct xen_kexec_exec { + int type; +} xen_kexec_exec_t; + +/* + * Load/Unload kernel image for kexec or kdump. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + * image == relocation information for kexec (ignored for unload) [in] + */ +#define KEXEC_CMD_kexec_load_v1 1 /* obsolete since 0x00040400 */ +#define KEXEC_CMD_kexec_unload_v1 2 /* obsolete since 0x00040400 */ +typedef struct xen_kexec_load_v1 { + int type; + xen_kexec_image_t image; +} xen_kexec_load_v1_t; + +#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */ +#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */ +#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */ +#define KEXEC_RANGE_MA_XENHEAP 3 /* machine address and size of xenheap + * Note that although this is adjacent + * to Xen it exists in a separate EFI + * region on ia64, and thus needs to be + * inserted into iomem_machine separately */ +#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* Obsolete: machine address and size of + * the ia64_boot_param */ +#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of + * of the EFI Memory Map */ +#define KEXEC_RANGE_MA_VMCOREINFO 6 /* machine address and size of vmcoreinfo */ + +/* + * Find the address and size of certain memory areas + * range == KEXEC_RANGE_... [in] + * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in] + * size == number of bytes reserved in window [out] + * start == address of the first byte in the window [out] + */ +#define KEXEC_CMD_kexec_get_range 3 +typedef struct xen_kexec_range { + int range; + int nr; + unsigned long size; + unsigned long start; +} xen_kexec_range_t; + +#if __XEN_INTERFACE_VERSION__ >= 0x00040400 +/* + * A contiguous chunk of a kexec image and it's destination machine + * address. + */ +typedef struct xen_kexec_segment { + union { + XEN_GUEST_HANDLE(const_void) h; + uint64_t _pad; + } buf; + uint64_t buf_size; + uint64_t dest_maddr; + uint64_t dest_size; +} xen_kexec_segment_t; +DEFINE_XEN_GUEST_HANDLE(xen_kexec_segment_t); + +/* + * Load a kexec image into memory. + * + * For KEXEC_TYPE_DEFAULT images, the segments may be anywhere in RAM. + * The image is relocated prior to being executed. + * + * For KEXEC_TYPE_CRASH images, each segment of the image must reside + * in the memory region reserved for kexec (KEXEC_RANGE_MA_CRASH) and + * the entry point must be within the image. The caller is responsible + * for ensuring that multiple images do not overlap. + * + * All image segments will be loaded to their destination machine + * addresses prior to being executed. The trailing portion of any + * segments with a source buffer (from dest_maddr + buf_size to + * dest_maddr + dest_size) will be zeroed. + * + * Segments with no source buffer will be accessible to the image when + * it is executed. + */ + +#define KEXEC_CMD_kexec_load 4 +typedef struct xen_kexec_load { + uint8_t type; /* One of KEXEC_TYPE_* */ + uint8_t _pad; + uint16_t arch; /* ELF machine type (EM_*). */ + uint32_t nr_segments; + union { + XEN_GUEST_HANDLE(xen_kexec_segment_t) h; + uint64_t _pad; + } segments; + uint64_t entry_maddr; /* image entry point machine address. */ +} xen_kexec_load_t; +DEFINE_XEN_GUEST_HANDLE(xen_kexec_load_t); + +/* + * Unload a kexec image. + * + * Type must be one of KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH. + */ +#define KEXEC_CMD_kexec_unload 5 +typedef struct xen_kexec_unload { + uint8_t type; +} xen_kexec_unload_t; +DEFINE_XEN_GUEST_HANDLE(xen_kexec_unload_t); + +#else /* __XEN_INTERFACE_VERSION__ < 0x00040400 */ + +#define KEXEC_CMD_kexec_load KEXEC_CMD_kexec_load_v1 +#define KEXEC_CMD_kexec_unload KEXEC_CMD_kexec_unload_v1 +#define xen_kexec_load xen_kexec_load_v1 +#define xen_kexec_load_t xen_kexec_load_v1_t + +#endif + +#endif /* _XEN_PUBLIC_KEXEC_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/mem_event.h xen-4.9.2/extras/mini-os/include/xen/mem_event.h --- xen-4.9.0/extras/mini-os/include/xen/mem_event.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/mem_event.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,134 @@ +/****************************************************************************** + * mem_event.h + * + * Memory event common structures. + * + * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _XEN_PUBLIC_MEM_EVENT_H +#define _XEN_PUBLIC_MEM_EVENT_H + +#include "xen.h" +#include "io/ring.h" + +/* Memory event flags */ +#define MEM_EVENT_FLAG_VCPU_PAUSED (1 << 0) +#define MEM_EVENT_FLAG_DROP_PAGE (1 << 1) +#define MEM_EVENT_FLAG_EVICT_FAIL (1 << 2) +#define MEM_EVENT_FLAG_FOREIGN (1 << 3) +#define MEM_EVENT_FLAG_DUMMY (1 << 4) +/* + * Emulate the fault-causing instruction (if set in the event response flags). + * This will allow the guest to continue execution without lifting the page + * access restrictions. + */ +#define MEM_EVENT_FLAG_EMULATE (1 << 5) +/* + * Same as MEM_EVENT_FLAG_EMULATE, but with write operations or operations + * potentially having side effects (like memory mapped or port I/O) disabled. + */ +#define MEM_EVENT_FLAG_EMULATE_NOWRITE (1 << 6) + +/* Reasons for the memory event request */ +#define MEM_EVENT_REASON_UNKNOWN 0 /* typical reason */ +#define MEM_EVENT_REASON_VIOLATION 1 /* access violation, GFN is address */ +#define MEM_EVENT_REASON_CR0 2 /* CR0 was hit: gfn is new CR0 value, gla is previous */ +#define MEM_EVENT_REASON_CR3 3 /* CR3 was hit: gfn is new CR3 value, gla is previous */ +#define MEM_EVENT_REASON_CR4 4 /* CR4 was hit: gfn is new CR4 value, gla is previous */ +#define MEM_EVENT_REASON_INT3 5 /* int3 was hit: gla/gfn are RIP */ +#define MEM_EVENT_REASON_SINGLESTEP 6 /* single step was invoked: gla/gfn are RIP */ +#define MEM_EVENT_REASON_MSR 7 /* MSR was hit: gfn is MSR value, gla is MSR address; + does NOT honour HVMPME_onchangeonly */ + +/* Using a custom struct (not hvm_hw_cpu) so as to not fill + * the mem_event ring buffer too quickly. */ +struct mem_event_regs_x86 { + uint64_t rax; + uint64_t rcx; + uint64_t rdx; + uint64_t rbx; + uint64_t rsp; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rflags; + uint64_t dr7; + uint64_t rip; + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + uint64_t msr_efer; + uint64_t msr_star; + uint64_t msr_lstar; + uint64_t fs_base; + uint64_t gs_base; + uint32_t cs_arbytes; + uint32_t _pad; +}; + +typedef struct mem_event_st { + uint32_t flags; + uint32_t vcpu_id; + + uint64_t gfn; + uint64_t offset; + uint64_t gla; /* if gla_valid */ + + uint32_t p2mt; + + uint16_t access_r:1; + uint16_t access_w:1; + uint16_t access_x:1; + uint16_t gla_valid:1; + uint16_t fault_with_gla:1; + uint16_t fault_in_gpt:1; + uint16_t available:10; + + uint16_t reason; + struct mem_event_regs_x86 x86_regs; +} mem_event_request_t, mem_event_response_t; + +DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t); + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/memory.h xen-4.9.2/extras/mini-os/include/xen/memory.h --- xen-4.9.0/extras/mini-os/include/xen/memory.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/memory.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,587 @@ +/****************************************************************************** + * memory.h + * + * Memory reservation and information. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_MEMORY_H__ +#define __XEN_PUBLIC_MEMORY_H__ + +#include "xen.h" + +/* + * Increase or decrease the specified domain's memory reservation. Returns the + * number of extents successfully allocated or freed. + * arg == addr of struct xen_memory_reservation. + */ +#define XENMEM_increase_reservation 0 +#define XENMEM_decrease_reservation 1 +#define XENMEM_populate_physmap 6 + +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 +/* + * Maximum # bits addressable by the user of the allocated region (e.g., I/O + * devices often have a 32-bit limitation even in 64-bit systems). If zero + * then the user has no addressing restriction. This field is not used by + * XENMEM_decrease_reservation. + */ +#define XENMEMF_address_bits(x) (x) +#define XENMEMF_get_address_bits(x) ((x) & 0xffu) +/* NUMA node to allocate from. */ +#define XENMEMF_node(x) (((x) + 1) << 8) +#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) +/* Flag to populate physmap with populate-on-demand entries */ +#define XENMEMF_populate_on_demand (1<<16) +/* Flag to request allocation only from the node specified */ +#define XENMEMF_exact_node_request (1<<17) +#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request) +#endif + +struct xen_memory_reservation { + + /* + * XENMEM_increase_reservation: + * OUT: MFN (*not* GMFN) bases of extents that were allocated + * XENMEM_decrease_reservation: + * IN: GMFN bases of extents to free + * XENMEM_populate_physmap: + * IN: GPFN bases of extents to populate with memory + * OUT: GMFN bases of extents that were allocated + * (NB. This command also updates the mach_to_phys translation table) + * XENMEM_claim_pages: + * IN: must be zero + */ + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; + + /* Number of extents, and size/alignment of each (2^extent_order pages). */ + xen_ulong_t nr_extents; + unsigned int extent_order; + +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 + /* XENMEMF flags. */ + unsigned int mem_flags; +#else + unsigned int address_bits; +#endif + + /* + * Domain whose reservation is being changed. + * Unprivileged domains can specify only DOMID_SELF. + */ + domid_t domid; +}; +typedef struct xen_memory_reservation xen_memory_reservation_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); + +/* + * An atomic exchange of memory pages. If return code is zero then + * @out.extent_list provides GMFNs of the newly-allocated memory. + * Returns zero on complete success, otherwise a negative error code. + * On complete success then always @nr_exchanged == @in.nr_extents. + * On partial success @nr_exchanged indicates how much work was done. + */ +#define XENMEM_exchange 11 +struct xen_memory_exchange { + /* + * [IN] Details of memory extents to be exchanged (GMFN bases). + * Note that @in.address_bits is ignored and unused. + */ + struct xen_memory_reservation in; + + /* + * [IN/OUT] Details of new memory extents. + * We require that: + * 1. @in.domid == @out.domid + * 2. @in.nr_extents << @in.extent_order == + * @out.nr_extents << @out.extent_order + * 3. @in.extent_start and @out.extent_start lists must not overlap + * 4. @out.extent_start lists GPFN bases to be populated + * 5. @out.extent_start is overwritten with allocated GMFN bases + */ + struct xen_memory_reservation out; + + /* + * [OUT] Number of input extents that were successfully exchanged: + * 1. The first @nr_exchanged input extents were successfully + * deallocated. + * 2. The corresponding first entries in the output extent list correctly + * indicate the GMFNs that were successfully exchanged. + * 3. All other input and output extents are untouched. + * 4. If not all input exents are exchanged then the return code of this + * command will be non-zero. + * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! + */ + xen_ulong_t nr_exchanged; +}; +typedef struct xen_memory_exchange xen_memory_exchange_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t); + +/* + * Returns the maximum machine frame number of mapped RAM in this system. + * This command always succeeds (it never returns an error code). + * arg == NULL. + */ +#define XENMEM_maximum_ram_page 2 + +/* + * Returns the current or maximum memory reservation, in pages, of the + * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. + * arg == addr of domid_t. + */ +#define XENMEM_current_reservation 3 +#define XENMEM_maximum_reservation 4 + +/* + * Returns the maximum GPFN in use by the guest, or -ve errcode on failure. + */ +#define XENMEM_maximum_gpfn 14 + +/* + * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys + * mapping table. Architectures which do not have a m2p table do not implement + * this command. + * arg == addr of xen_machphys_mfn_list_t. + */ +#define XENMEM_machphys_mfn_list 5 +struct xen_machphys_mfn_list { + /* + * Size of the 'extent_start' array. Fewer entries will be filled if the + * machphys table is smaller than max_extents * 2MB. + */ + unsigned int max_extents; + + /* + * Pointer to buffer to fill with list of extent starts. If there are + * any large discontiguities in the machine address space, 2MB gaps in + * the machphys table will be represented by an MFN base of zero. + */ + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; + + /* + * Number of extents written to the above array. This will be smaller + * than 'max_extents' if the machphys table is smaller than max_e * 2MB. + */ + unsigned int nr_extents; +}; +typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); + +/* + * For a compat caller, this is identical to XENMEM_machphys_mfn_list. + * + * For a non compat caller, this functions similarly to + * XENMEM_machphys_mfn_list, but returns the mfns making up the compatibility + * m2p table. + */ +#define XENMEM_machphys_compat_mfn_list 25 + +/* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ + xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */ +}; +typedef struct xen_machphys_mapping xen_machphys_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t); + +/* Source mapping space. */ +/* ` enum phys_map_space { */ +#define XENMAPSPACE_shared_info 0 /* shared info page */ +#define XENMAPSPACE_grant_table 1 /* grant table page */ +#define XENMAPSPACE_gmfn 2 /* GMFN */ +#define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */ +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, + * XENMEM_add_to_physmap_batch only. */ +/* ` } */ + +/* + * Sets the GPFN at which a particular page appears in the specified guest's + * pseudophysical address space. + * arg == addr of xen_add_to_physmap_t. + */ +#define XENMEM_add_to_physmap 7 +struct xen_add_to_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* Number of pages to go through for gmfn_range */ + uint16_t size; + + unsigned int space; /* => enum phys_map_space */ + +#define XENMAPIDX_grant_table_status 0x80000000 + + /* Index into space being mapped. */ + xen_ulong_t idx; + + /* GPFN in domid where the source mapping page should appear. */ + xen_pfn_t gpfn; +}; +typedef struct xen_add_to_physmap xen_add_to_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); + +/* A batched version of add_to_physmap. */ +#define XENMEM_add_to_physmap_batch 23 +struct xen_add_to_physmap_batch { + /* IN */ + /* Which domain to change the mapping for. */ + domid_t domid; + uint16_t space; /* => enum phys_map_space */ + + /* Number of pages to go through */ + uint16_t size; + domid_t foreign_domid; /* IFF gmfn_foreign */ + + /* Indexes into space being mapped. */ + XEN_GUEST_HANDLE(xen_ulong_t) idxs; + + /* GPFN in domid where the source mapping page should appear. */ + XEN_GUEST_HANDLE(xen_pfn_t) gpfns; + + /* OUT */ + + /* Per index error code. */ + XEN_GUEST_HANDLE(int) errs; +}; +typedef struct xen_add_to_physmap_batch xen_add_to_physmap_batch_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_batch_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00040400 +#define XENMEM_add_to_physmap_range XENMEM_add_to_physmap_batch +#define xen_add_to_physmap_range xen_add_to_physmap_batch +typedef struct xen_add_to_physmap_batch xen_add_to_physmap_range_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_range_t); +#endif + +/* + * Unmaps the page appearing at a particular GPFN from the specified guest's + * pseudophysical address space. + * arg == addr of xen_remove_from_physmap_t. + */ +#define XENMEM_remove_from_physmap 15 +struct xen_remove_from_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* GPFN of the current mapping of the page. */ + xen_pfn_t gpfn; +}; +typedef struct xen_remove_from_physmap xen_remove_from_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t); + +/*** REMOVED ***/ +/*#define XENMEM_translate_gpfn_list 8*/ + +/* + * Returns the pseudo-physical memory map as it was when the domain + * was started (specified by XENMEM_set_memory_map). + * arg == addr of xen_memory_map_t. + */ +#define XENMEM_memory_map 9 +struct xen_memory_map { + /* + * On call the number of entries which can be stored in buffer. On + * return the number of entries which have been stored in + * buffer. + */ + unsigned int nr_entries; + + /* + * Entries in the buffer are in the same format as returned by the + * BIOS INT 0x15 EAX=0xE820 call. + */ + XEN_GUEST_HANDLE(void) buffer; +}; +typedef struct xen_memory_map xen_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t); + +/* + * Returns the real physical memory map. Passes the same structure as + * XENMEM_memory_map. + * arg == addr of xen_memory_map_t. + */ +#define XENMEM_machine_memory_map 10 + +/* + * Set the pseudo-physical memory map of a domain, as returned by + * XENMEM_memory_map. + * arg == addr of xen_foreign_memory_map_t. + */ +#define XENMEM_set_memory_map 13 +struct xen_foreign_memory_map { + domid_t domid; + struct xen_memory_map map; +}; +typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); + +#define XENMEM_set_pod_target 16 +#define XENMEM_get_pod_target 17 +struct xen_pod_target { + /* IN */ + uint64_t target_pages; + /* OUT */ + uint64_t tot_pages; + uint64_t pod_cache_pages; + uint64_t pod_entries; + /* IN */ + domid_t domid; +}; +typedef struct xen_pod_target xen_pod_target_t; + +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif + +/* + * Get the number of MFNs saved through memory sharing. + * The call never fails. + */ +#define XENMEM_get_sharing_freed_pages 18 +#define XENMEM_get_sharing_shared_pages 19 + +#define XENMEM_paging_op 20 +#define XENMEM_paging_op_nominate 0 +#define XENMEM_paging_op_evict 1 +#define XENMEM_paging_op_prep 2 + +struct xen_mem_event_op { + uint8_t op; /* XENMEM_*_op_* */ + domid_t domain; + + + /* PAGING_PREP IN: buffer to immediately fill page in */ + uint64_aligned_t buffer; + /* Other OPs */ + uint64_aligned_t gfn; /* IN: gfn of page being operated on */ +}; +typedef struct xen_mem_event_op xen_mem_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t); + +#define XENMEM_access_op 21 +#define XENMEM_access_op_resume 0 +#define XENMEM_access_op_set_access 1 +#define XENMEM_access_op_get_access 2 + +typedef enum { + XENMEM_access_n, + XENMEM_access_r, + XENMEM_access_w, + XENMEM_access_rw, + XENMEM_access_x, + XENMEM_access_rx, + XENMEM_access_wx, + XENMEM_access_rwx, + /* + * Page starts off as r-x, but automatically + * change to r-w on a write + */ + XENMEM_access_rx2rw, + /* + * Log access: starts off as n, automatically + * goes to rwx, generating an event without + * pausing the vcpu + */ + XENMEM_access_n2rwx, + /* Take the domain default */ + XENMEM_access_default +} xenmem_access_t; + +struct xen_mem_access_op { + /* XENMEM_access_op_* */ + uint8_t op; + /* xenmem_access_t */ + uint8_t access; + domid_t domid; + /* + * Number of pages for set op + * Ignored on setting default access and other ops + */ + uint32_t nr; + /* + * First pfn for set op + * pfn for get op + * ~0ull is used to set and get the default access for pages + */ + uint64_aligned_t pfn; +}; +typedef struct xen_mem_access_op xen_mem_access_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t); + +#define XENMEM_sharing_op 22 +#define XENMEM_sharing_op_nominate_gfn 0 +#define XENMEM_sharing_op_nominate_gref 1 +#define XENMEM_sharing_op_share 2 +#define XENMEM_sharing_op_resume 3 +#define XENMEM_sharing_op_debug_gfn 4 +#define XENMEM_sharing_op_debug_mfn 5 +#define XENMEM_sharing_op_debug_gref 6 +#define XENMEM_sharing_op_add_physmap 7 +#define XENMEM_sharing_op_audit 8 + +#define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10) +#define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9) + +/* The following allows sharing of grant refs. This is useful + * for sharing utilities sitting as "filters" in IO backends + * (e.g. memshr + blktap(2)). The IO backend is only exposed + * to grant references, and this allows sharing of the grefs */ +#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG (1ULL << 62) + +#define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val) \ + (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val) +#define XENMEM_SHARING_OP_FIELD_IS_GREF(field) \ + ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG) +#define XENMEM_SHARING_OP_FIELD_GET_GREF(field) \ + ((field) & (~XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG)) + +struct xen_mem_sharing_op { + uint8_t op; /* XENMEM_sharing_op_* */ + domid_t domain; + + union { + struct mem_sharing_op_nominate { /* OP_NOMINATE_xxx */ + union { + uint64_aligned_t gfn; /* IN: gfn to nominate */ + uint32_t grant_ref; /* IN: grant ref to nominate */ + } u; + uint64_aligned_t handle; /* OUT: the handle */ + } nominate; + struct mem_sharing_op_share { /* OP_SHARE/ADD_PHYSMAP */ + uint64_aligned_t source_gfn; /* IN: the gfn of the source page */ + uint64_aligned_t source_handle; /* IN: handle to the source page */ + uint64_aligned_t client_gfn; /* IN: the client gfn */ + uint64_aligned_t client_handle; /* IN: handle to the client page */ + domid_t client_domain; /* IN: the client domain id */ + } share; + struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ + union { + uint64_aligned_t gfn; /* IN: gfn to debug */ + uint64_aligned_t mfn; /* IN: mfn to debug */ + uint32_t gref; /* IN: gref to debug */ + } u; + } debug; + } u; +}; +typedef struct xen_mem_sharing_op xen_mem_sharing_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); + +/* + * Attempt to stake a claim for a domain on a quantity of pages + * of system RAM, but _not_ assign specific pageframes. Only + * arithmetic is performed so the hypercall is very fast and need + * not be preemptible, thus sidestepping time-of-check-time-of-use + * races for memory allocation. Returns 0 if the hypervisor page + * allocator has atomically and successfully claimed the requested + * number of pages, else non-zero. + * + * Any domain may have only one active claim. When sufficient memory + * has been allocated to resolve the claim, the claim silently expires. + * Claiming zero pages effectively resets any outstanding claim and + * is always successful. + * + * Note that a valid claim may be staked even after memory has been + * allocated for a domain. In this case, the claim is not incremental, + * i.e. if the domain's tot_pages is 3, and a claim is staked for 10, + * only 7 additional pages are claimed. + * + * Caller must be privileged or the hypercall fails. + */ +#define XENMEM_claim_pages 24 + +/* + * XENMEM_claim_pages flags - the are no flags at this time. + * The zero value is appropiate. + */ + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +/* + * XENMEM_get_vnumainfo used by guest to get + * vNUMA topology from hypervisor. + */ +#define XENMEM_get_vnumainfo 26 + +/* vNUMA node memory ranges */ +struct xen_vmemrange { + uint64_t start, end; + unsigned int flags; + unsigned int nid; +}; +typedef struct xen_vmemrange xen_vmemrange_t; +DEFINE_XEN_GUEST_HANDLE(xen_vmemrange_t); + +/* + * vNUMA topology specifies vNUMA node number, distance table, + * memory ranges and vcpu mapping provided for guests. + * XENMEM_get_vnumainfo hypercall expects to see from guest + * nr_vnodes, nr_vmemranges and nr_vcpus to indicate available memory. + * After filling guests structures, nr_vnodes, nr_vmemranges and nr_vcpus + * copied back to guest. Domain returns expected values of nr_vnodes, + * nr_vmemranges and nr_vcpus to guest if the values where incorrect. + */ +struct xen_vnuma_topology_info { + /* IN */ + domid_t domid; + uint16_t pad; + /* IN/OUT */ + unsigned int nr_vnodes; + unsigned int nr_vcpus; + unsigned int nr_vmemranges; + /* OUT */ + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vdistance; + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vcpu_to_vnode; + union { + XEN_GUEST_HANDLE(xen_vmemrange_t) h; + uint64_t pad; + } vmemrange; +}; +typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t); + +/* Next available subop number is 27 */ + +#endif /* __XEN_PUBLIC_MEMORY_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/nmi.h xen-4.9.2/extras/mini-os/include/xen/nmi.h --- xen-4.9.0/extras/mini-os/include/xen/nmi.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/nmi.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,85 @@ +/****************************************************************************** + * nmi.h + * + * NMI callback registration and reason codes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_NMI_H__ +#define __XEN_PUBLIC_NMI_H__ + +#include "xen.h" + +/* + * NMI reason codes: + * Currently these are x86-specific, stored in arch_shared_info.nmi_reason. + */ + /* I/O-check error reported via ISA port 0x61, bit 6. */ +#define _XEN_NMIREASON_io_error 0 +#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error) + /* PCI SERR reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_pci_serr 1 +#define XEN_NMIREASON_pci_serr (1UL << _XEN_NMIREASON_pci_serr) +#if __XEN_INTERFACE_VERSION__ < 0x00040300 /* legacy alias of the above */ + /* Parity error reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_parity_error 1 +#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error) +#endif + /* Unknown hardware-generated NMI. */ +#define _XEN_NMIREASON_unknown 2 +#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown) + +/* + * long nmi_op(unsigned int cmd, void *arg) + * NB. All ops return zero on success, else a negative error code. + */ + +/* + * Register NMI callback for this (calling) VCPU. Currently this only makes + * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. + * arg == pointer to xennmi_callback structure. + */ +#define XENNMI_register_callback 0 +struct xennmi_callback { + unsigned long handler_address; + unsigned long pad; +}; +typedef struct xennmi_callback xennmi_callback_t; +DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t); + +/* + * Deregister NMI callback for this (calling) VCPU. + * arg == NULL. + */ +#define XENNMI_unregister_callback 1 + +#endif /* __XEN_PUBLIC_NMI_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/physdev.h xen-4.9.2/extras/mini-os/include/xen/physdev.h --- xen-4.9.0/extras/mini-os/include/xen/physdev.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/physdev.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,380 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_PHYSDEV_H__ +#define __XEN_PUBLIC_PHYSDEV_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * int physdev_op(int cmd, void *args) + * @cmd == PHYSDEVOP_??? (physdev operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Notify end-of-interrupt (EOI) for the specified IRQ. + * @arg == pointer to physdev_eoi structure. + */ +#define PHYSDEVOP_eoi 12 +struct physdev_eoi { + /* IN */ + uint32_t irq; +}; +typedef struct physdev_eoi physdev_eoi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); + +/* + * Register a shared page for the hypervisor to indicate whether the guest + * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly + * once the guest used this function in that the associated event channel + * will automatically get unmasked. The page registered is used as a bit + * array indexed by Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v1 17 +/* + * Register a shared page for the hypervisor to indicate whether the + * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to + * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of + * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by + * Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v2 28 +struct physdev_pirq_eoi_gmfn { + /* IN */ + xen_pfn_t gmfn; +}; +typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t); + +/* + * Query the status of an IRQ line. + * @arg == pointer to physdev_irq_status_query structure. + */ +#define PHYSDEVOP_irq_status_query 5 +struct physdev_irq_status_query { + /* IN */ + uint32_t irq; + /* OUT */ + uint32_t flags; /* XENIRQSTAT_* */ +}; +typedef struct physdev_irq_status_query physdev_irq_status_query_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t); + +/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ +#define _XENIRQSTAT_needs_eoi (0) +#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi) + +/* IRQ shared by multiple guests? */ +#define _XENIRQSTAT_shared (1) +#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared) + +/* + * Set the current VCPU's I/O privilege level. + * @arg == pointer to physdev_set_iopl structure. + */ +#define PHYSDEVOP_set_iopl 6 +struct physdev_set_iopl { + /* IN */ + uint32_t iopl; +}; +typedef struct physdev_set_iopl physdev_set_iopl_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t); + +/* + * Set the current VCPU's I/O-port permissions bitmap. + * @arg == pointer to physdev_set_iobitmap structure. + */ +#define PHYSDEVOP_set_iobitmap 7 +struct physdev_set_iobitmap { + /* IN */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(uint8) bitmap; +#else + uint8_t *bitmap; +#endif + uint32_t nr_ports; +}; +typedef struct physdev_set_iobitmap physdev_set_iobitmap_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t); + +/* + * Read or write an IO-APIC register. + * @arg == pointer to physdev_apic structure. + */ +#define PHYSDEVOP_apic_read 8 +#define PHYSDEVOP_apic_write 9 +struct physdev_apic { + /* IN */ + unsigned long apic_physbase; + uint32_t reg; + /* IN or OUT */ + uint32_t value; +}; +typedef struct physdev_apic physdev_apic_t; +DEFINE_XEN_GUEST_HANDLE(physdev_apic_t); + +/* + * Allocate or free a physical upcall vector for the specified IRQ line. + * @arg == pointer to physdev_irq structure. + */ +#define PHYSDEVOP_alloc_irq_vector 10 +#define PHYSDEVOP_free_irq_vector 11 +struct physdev_irq { + /* IN */ + uint32_t irq; + /* IN or OUT */ + uint32_t vector; +}; +typedef struct physdev_irq physdev_irq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); + +#define MAP_PIRQ_TYPE_MSI 0x0 +#define MAP_PIRQ_TYPE_GSI 0x1 +#define MAP_PIRQ_TYPE_UNKNOWN 0x2 +#define MAP_PIRQ_TYPE_MSI_SEG 0x3 +#define MAP_PIRQ_TYPE_MULTI_MSI 0x4 + +#define PHYSDEVOP_map_pirq 13 +struct physdev_map_pirq { + domid_t domid; + /* IN */ + int type; + /* IN (ignored for ..._MULTI_MSI) */ + int index; + /* IN or OUT */ + int pirq; + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ + int bus; + /* IN */ + int devfn; + /* IN (also OUT for ..._MULTI_MSI) */ + int entry_nr; + /* IN */ + uint64_t table_base; +}; +typedef struct physdev_map_pirq physdev_map_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t); + +#define PHYSDEVOP_unmap_pirq 14 +struct physdev_unmap_pirq { + domid_t domid; + /* IN */ + int pirq; +}; + +typedef struct physdev_unmap_pirq physdev_unmap_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t); + +#define PHYSDEVOP_manage_pci_add 15 +#define PHYSDEVOP_manage_pci_remove 16 +struct physdev_manage_pci { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + +typedef struct physdev_manage_pci physdev_manage_pci_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); + +#define PHYSDEVOP_restore_msi 19 +struct physdev_restore_msi { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_restore_msi physdev_restore_msi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t); + +#define PHYSDEVOP_manage_pci_add_ext 20 +struct physdev_manage_pci_ext { + /* IN */ + uint8_t bus; + uint8_t devfn; + unsigned is_extfn; + unsigned is_virtfn; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +}; + +typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t); + +/* + * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() + * hypercall since 0x00030202. + */ +struct physdev_op { + uint32_t cmd; + union { + struct physdev_irq_status_query irq_status_query; + struct physdev_set_iopl set_iopl; + struct physdev_set_iobitmap set_iobitmap; + struct physdev_apic apic_op; + struct physdev_irq irq_op; + } u; +}; +typedef struct physdev_op physdev_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_op_t); + +#define PHYSDEVOP_setup_gsi 21 +struct physdev_setup_gsi { + int gsi; + /* IN */ + uint8_t triggering; + /* IN */ + uint8_t polarity; + /* IN */ +}; + +typedef struct physdev_setup_gsi physdev_setup_gsi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t); + +/* leave PHYSDEVOP 22 free */ + +/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI + * the hypercall returns a free pirq */ +#define PHYSDEVOP_get_free_pirq 23 +struct physdev_get_free_pirq { + /* IN */ + int type; + /* OUT */ + uint32_t pirq; +}; + +typedef struct physdev_get_free_pirq physdev_get_free_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t); + +#define XEN_PCI_MMCFG_RESERVED 0x1 + +#define PHYSDEVOP_pci_mmcfg_reserved 24 +struct physdev_pci_mmcfg_reserved { + uint64_t address; + uint16_t segment; + uint8_t start_bus; + uint8_t end_bus; + uint32_t flags; +}; +typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t); + +#define XEN_PCI_DEV_EXTFN 0x1 +#define XEN_PCI_DEV_VIRTFN 0x2 +#define XEN_PCI_DEV_PXM 0x4 + +#define PHYSDEVOP_pci_device_add 25 +struct physdev_pci_device_add { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; + uint32_t flags; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint32_t optarr[]; +#elif defined(__GNUC__) + uint32_t optarr[0]; +#endif +}; +typedef struct physdev_pci_device_add physdev_pci_device_add_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); + +#define PHYSDEVOP_pci_device_remove 26 +#define PHYSDEVOP_restore_msi_ext 27 +/* + * Dom0 should use these two to announce MMIO resources assigned to + * MSI-X capable devices won't (prepare) or may (release) change. + */ +#define PHYSDEVOP_prepare_msix 30 +#define PHYSDEVOP_release_msix 31 +struct physdev_pci_device { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_pci_device physdev_pci_device_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t); + +#define PHYSDEVOP_DBGP_RESET_PREPARE 1 +#define PHYSDEVOP_DBGP_RESET_DONE 2 + +#define PHYSDEVOP_DBGP_BUS_UNKNOWN 0 +#define PHYSDEVOP_DBGP_BUS_PCI 1 + +#define PHYSDEVOP_dbgp_op 29 +struct physdev_dbgp_op { + /* IN */ + uint8_t op; + uint8_t bus; + union { + struct physdev_pci_device pci; + } u; +}; +typedef struct physdev_dbgp_op physdev_dbgp_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); + +/* + * Notify that some PIRQ-bound event channels have been unmasked. + * ** This command is obsolete since interface version 0x00030202 and is ** + * ** unsupported by newer versions of Xen. ** + */ +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 + +#if __XEN_INTERFACE_VERSION__ < 0x00040600 +/* + * These all-capitals physdev operation names are superceded by the new names + * (defined above) since interface version 0x00030202. The guard above was + * added post-4.5 only though and hence shouldn't check for 0x00030202. + */ +#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query +#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl +#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap +#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read +#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write +#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector +#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector +#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi +#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared +#endif + +#if __XEN_INTERFACE_VERSION__ < 0x00040200 +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v1 +#else +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v2 +#endif + +#endif /* __XEN_PUBLIC_PHYSDEV_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/platform.h xen-4.9.2/extras/mini-os/include/xen/platform.h --- xen-4.9.0/extras/mini-os/include/xen/platform.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/platform.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,606 @@ +/****************************************************************************** + * platform.h + * + * Hardware platform operations. Intended for use by domain-0 kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_PLATFORM_H__ +#define __XEN_PUBLIC_PLATFORM_H__ + +#include "xen.h" + +#define XENPF_INTERFACE_VERSION 0x03000001 + +/* + * Set clock such that it would read after 00:00:00 UTC, + * 1 January, 1970 if the current system time was . + */ +#define XENPF_settime 17 +struct xenpf_settime { + /* IN variables. */ + uint32_t secs; + uint32_t nsecs; + uint64_t system_time; +}; +typedef struct xenpf_settime xenpf_settime_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t); + +/* + * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. + * On x86, @type is an architecture-defined MTRR memory type. + * On success, returns the MTRR that was used (@reg) and a handle that can + * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting. + * (x86-specific). + */ +#define XENPF_add_memtype 31 +struct xenpf_add_memtype { + /* IN variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; + /* OUT variables. */ + uint32_t handle; + uint32_t reg; +}; +typedef struct xenpf_add_memtype xenpf_add_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t); + +/* + * Tear down an existing memory-range type. If @handle is remembered then it + * should be passed in to accurately tear down the correct setting (in case + * of overlapping memory regions with differing types). If it is not known + * then @handle should be set to zero. In all cases @reg must be set. + * (x86-specific). + */ +#define XENPF_del_memtype 32 +struct xenpf_del_memtype { + /* IN variables. */ + uint32_t handle; + uint32_t reg; +}; +typedef struct xenpf_del_memtype xenpf_del_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t); + +/* Read current type of an MTRR (x86-specific). */ +#define XENPF_read_memtype 33 +struct xenpf_read_memtype { + /* IN variables. */ + uint32_t reg; + /* OUT variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; +}; +typedef struct xenpf_read_memtype xenpf_read_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t); + +#define XENPF_microcode_update 35 +struct xenpf_microcode_update { + /* IN variables. */ + XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */ + uint32_t length; /* Length of microcode data. */ +}; +typedef struct xenpf_microcode_update xenpf_microcode_update_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t); + +#define XENPF_platform_quirk 39 +#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */ +#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */ +#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */ +struct xenpf_platform_quirk { + /* IN variables. */ + uint32_t quirk_id; +}; +typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); + +#define XENPF_efi_runtime_call 49 +#define XEN_EFI_get_time 1 +#define XEN_EFI_set_time 2 +#define XEN_EFI_get_wakeup_time 3 +#define XEN_EFI_set_wakeup_time 4 +#define XEN_EFI_get_next_high_monotonic_count 5 +#define XEN_EFI_get_variable 6 +#define XEN_EFI_set_variable 7 +#define XEN_EFI_get_next_variable_name 8 +#define XEN_EFI_query_variable_info 9 +#define XEN_EFI_query_capsule_capabilities 10 +#define XEN_EFI_update_capsule 11 +struct xenpf_efi_runtime_call { + uint32_t function; + /* + * This field is generally used for per sub-function flags (defined + * below), except for the XEN_EFI_get_next_high_monotonic_count case, + * where it holds the single returned value. + */ + uint32_t misc; + xen_ulong_t status; + union { +#define XEN_EFI_GET_TIME_SET_CLEARS_NS 0x00000001 + struct { + struct xenpf_efi_time { + uint16_t year; + uint8_t month; + uint8_t day; + uint8_t hour; + uint8_t min; + uint8_t sec; + uint32_t ns; + int16_t tz; + uint8_t daylight; + } time; + uint32_t resolution; + uint32_t accuracy; + } get_time; + + struct xenpf_efi_time set_time; + +#define XEN_EFI_GET_WAKEUP_TIME_ENABLED 0x00000001 +#define XEN_EFI_GET_WAKEUP_TIME_PENDING 0x00000002 + struct xenpf_efi_time get_wakeup_time; + +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE 0x00000001 +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY 0x00000002 + struct xenpf_efi_time set_wakeup_time; + +#define XEN_EFI_VARIABLE_NON_VOLATILE 0x00000001 +#define XEN_EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002 +#define XEN_EFI_VARIABLE_RUNTIME_ACCESS 0x00000004 + struct { + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + xen_ulong_t size; + XEN_GUEST_HANDLE(void) data; + struct xenpf_efi_guid { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; + } vendor_guid; + } get_variable, set_variable; + + struct { + xen_ulong_t size; + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + struct xenpf_efi_guid vendor_guid; + } get_next_variable_name; + +#define XEN_EFI_VARINFO_BOOT_SNAPSHOT 0x00000001 + struct { + uint32_t attr; + uint64_t max_store_size; + uint64_t remain_store_size; + uint64_t max_size; + } query_variable_info; + + struct { + XEN_GUEST_HANDLE(void) capsule_header_array; + xen_ulong_t capsule_count; + uint64_t max_capsule_size; + uint32_t reset_type; + } query_capsule_capabilities; + + struct { + XEN_GUEST_HANDLE(void) capsule_header_array; + xen_ulong_t capsule_count; + uint64_t sg_list; /* machine address */ + } update_capsule; + } u; +}; +typedef struct xenpf_efi_runtime_call xenpf_efi_runtime_call_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_efi_runtime_call_t); + +#define XENPF_firmware_info 50 +#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ +#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ +#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ +#define XEN_FW_EFI_INFO 4 /* from EFI */ +#define XEN_FW_EFI_VERSION 0 +#define XEN_FW_EFI_CONFIG_TABLE 1 +#define XEN_FW_EFI_VENDOR 2 +#define XEN_FW_EFI_MEM_INFO 3 +#define XEN_FW_EFI_RT_VERSION 4 +#define XEN_FW_EFI_PCI_ROM 5 +#define XEN_FW_KBD_SHIFT_FLAGS 5 +struct xenpf_firmware_info { + /* IN variables. */ + uint32_t type; + uint32_t index; + /* OUT variables. */ + union { + struct { + /* Int13, Fn48: Check Extensions Present. */ + uint8_t device; /* %dl: bios device number */ + uint8_t version; /* %ah: major version */ + uint16_t interface_support; /* %cx: support bitmap */ + /* Int13, Fn08: Legacy Get Device Parameters. */ + uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ + uint8_t legacy_max_head; /* %dh: max head # */ + uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ + /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ + /* NB. First uint16_t of buffer must be set to buffer size. */ + XEN_GUEST_HANDLE(void) edd_params; + } disk_info; /* XEN_FW_DISK_INFO */ + struct { + uint8_t device; /* bios device number */ + uint32_t mbr_signature; /* offset 0x1b8 in mbr */ + } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ + struct { + /* Int10, AX=4F15: Get EDID info. */ + uint8_t capabilities; + uint8_t edid_transfer_time; + /* must refer to 128-byte buffer */ + XEN_GUEST_HANDLE(uint8) edid; + } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + union xenpf_efi_info { + uint32_t version; + struct { + uint64_t addr; /* EFI_CONFIGURATION_TABLE */ + uint32_t nent; + } cfg; + struct { + uint32_t revision; + uint32_t bufsz; /* input, in bytes */ + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + } vendor; + struct { + uint64_t addr; + uint64_t size; + uint64_t attr; + uint32_t type; + } mem; + struct { + /* IN variables */ + uint16_t segment; + uint8_t bus; + uint8_t devfn; + uint16_t vendor; + uint16_t devid; + /* OUT variables */ + uint64_t address; + xen_ulong_t size; + } pci_rom; + } efi_info; /* XEN_FW_EFI_INFO */ + + /* Int16, Fn02: Get keyboard shift flags. */ + uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */ + } u; +}; +typedef struct xenpf_firmware_info xenpf_firmware_info_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t); + +#define XENPF_enter_acpi_sleep 51 +struct xenpf_enter_acpi_sleep { + /* IN variables */ +#if __XEN_INTERFACE_VERSION__ < 0x00040300 + uint16_t pm1a_cnt_val; /* PM1a control value. */ + uint16_t pm1b_cnt_val; /* PM1b control value. */ +#else + uint16_t val_a; /* PM1a control / sleep type A. */ + uint16_t val_b; /* PM1b control / sleep type B. */ +#endif + uint32_t sleep_state; /* Which state to enter (Sn). */ +#define XENPF_ACPI_SLEEP_EXTENDED 0x00000001 + uint32_t flags; /* XENPF_ACPI_SLEEP_*. */ +}; +typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t); + +#define XENPF_change_freq 52 +struct xenpf_change_freq { + /* IN variables */ + uint32_t flags; /* Must be zero. */ + uint32_t cpu; /* Physical cpu. */ + uint64_t freq; /* New frequency (Hz). */ +}; +typedef struct xenpf_change_freq xenpf_change_freq_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t); + +/* + * Get idle times (nanoseconds since boot) for physical CPUs specified in the + * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is + * indexed by CPU number; only entries with the corresponding @cpumap_bitmap + * bit set are written to. On return, @cpumap_bitmap is modified so that any + * non-existent CPUs are cleared. Such CPUs have their @idletime array entry + * cleared. + */ +#define XENPF_getidletime 53 +struct xenpf_getidletime { + /* IN/OUT variables */ + /* IN: CPUs to interrogate; OUT: subset of IN which are present */ + XEN_GUEST_HANDLE(uint8) cpumap_bitmap; + /* IN variables */ + /* Size of cpumap bitmap. */ + uint32_t cpumap_nr_cpus; + /* Must be indexable for every cpu in cpumap_bitmap. */ + XEN_GUEST_HANDLE(uint64) idletime; + /* OUT variables */ + /* System time when the idletime snapshots were taken. */ + uint64_t now; +}; +typedef struct xenpf_getidletime xenpf_getidletime_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t); + +#define XENPF_set_processor_pminfo 54 + +/* ability bits */ +#define XEN_PROCESSOR_PM_CX 1 +#define XEN_PROCESSOR_PM_PX 2 +#define XEN_PROCESSOR_PM_TX 4 + +/* cmd type */ +#define XEN_PM_CX 0 +#define XEN_PM_PX 1 +#define XEN_PM_TX 2 +#define XEN_PM_PDC 3 + +/* Px sub info type */ +#define XEN_PX_PCT 1 +#define XEN_PX_PSS 2 +#define XEN_PX_PPC 4 +#define XEN_PX_PSD 8 + +struct xen_power_register { + uint32_t space_id; + uint32_t bit_width; + uint32_t bit_offset; + uint32_t access_size; + uint64_t address; +}; + +struct xen_processor_csd { + uint32_t domain; /* domain number of one dependent group */ + uint32_t coord_type; /* coordination type */ + uint32_t num; /* number of processors in same domain */ +}; +typedef struct xen_processor_csd xen_processor_csd_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t); + +struct xen_processor_cx { + struct xen_power_register reg; /* GAS for Cx trigger register */ + uint8_t type; /* cstate value, c0: 0, c1: 1, ... */ + uint32_t latency; /* worst latency (ms) to enter/exit this cstate */ + uint32_t power; /* average power consumption(mW) */ + uint32_t dpcnt; /* number of dependency entries */ + XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */ +}; +typedef struct xen_processor_cx xen_processor_cx_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t); + +struct xen_processor_flags { + uint32_t bm_control:1; + uint32_t bm_check:1; + uint32_t has_cst:1; + uint32_t power_setup_done:1; + uint32_t bm_rld_set:1; +}; + +struct xen_processor_power { + uint32_t count; /* number of C state entries in array below */ + struct xen_processor_flags flags; /* global flags of this processor */ + XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */ +}; + +struct xen_pct_register { + uint8_t descriptor; + uint16_t length; + uint8_t space_id; + uint8_t bit_width; + uint8_t bit_offset; + uint8_t reserved; + uint64_t address; +}; + +struct xen_processor_px { + uint64_t core_frequency; /* megahertz */ + uint64_t power; /* milliWatts */ + uint64_t transition_latency; /* microseconds */ + uint64_t bus_master_latency; /* microseconds */ + uint64_t control; /* control value */ + uint64_t status; /* success indicator */ +}; +typedef struct xen_processor_px xen_processor_px_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t); + +struct xen_psd_package { + uint64_t num_entries; + uint64_t revision; + uint64_t domain; + uint64_t coord_type; + uint64_t num_processors; +}; + +struct xen_processor_performance { + uint32_t flags; /* flag for Px sub info type */ + uint32_t platform_limit; /* Platform limitation on freq usage */ + struct xen_pct_register control_register; + struct xen_pct_register status_register; + uint32_t state_count; /* total available performance states */ + XEN_GUEST_HANDLE(xen_processor_px_t) states; + struct xen_psd_package domain_info; + uint32_t shared_type; /* coordination type of this processor */ +}; +typedef struct xen_processor_performance xen_processor_performance_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t); + +struct xenpf_set_processor_pminfo { + /* IN variables */ + uint32_t id; /* ACPI CPU ID */ + uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */ + union { + struct xen_processor_power power;/* Cx: _CST/_CSD */ + struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ + XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */ + } u; +}; +typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t); + +#define XENPF_get_cpuinfo 55 +struct xenpf_pcpuinfo { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; +#define XEN_PCPU_FLAGS_ONLINE 1 + /* Correponding xen_cpuid is not present*/ +#define XEN_PCPU_FLAGS_INVALID 2 + uint32_t flags; + uint32_t apic_id; + uint32_t acpi_id; +}; +typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t); + +#define XENPF_get_cpu_version 48 +struct xenpf_pcpu_version { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; + char vendor_id[12]; + uint32_t family; + uint32_t model; + uint32_t stepping; +}; +typedef struct xenpf_pcpu_version xenpf_pcpu_version_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t); + +#define XENPF_cpu_online 56 +#define XENPF_cpu_offline 57 +struct xenpf_cpu_ol +{ + uint32_t cpuid; +}; +typedef struct xenpf_cpu_ol xenpf_cpu_ol_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_cpu_ol_t); + +#define XENPF_cpu_hotadd 58 +struct xenpf_cpu_hotadd +{ + uint32_t apic_id; + uint32_t acpi_id; + uint32_t pxm; +}; + +#define XENPF_mem_hotadd 59 +struct xenpf_mem_hotadd +{ + uint64_t spfn; + uint64_t epfn; + uint32_t pxm; + uint32_t flags; +}; + +#define XENPF_core_parking 60 + +#define XEN_CORE_PARKING_SET 1 +#define XEN_CORE_PARKING_GET 2 +struct xenpf_core_parking { + /* IN variables */ + uint32_t type; + /* IN variables: set cpu nums expected to be idled */ + /* OUT variables: get cpu nums actually be idled */ + uint32_t idle_nums; +}; +typedef struct xenpf_core_parking xenpf_core_parking_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t); + +/* + * Access generic platform resources(e.g., accessing MSR, port I/O, etc) + * in unified way. Batch resource operations in one call are supported and + * they are always non-preemptible and executed in their original order. + * The batch itself returns a negative integer for general errors, or a + * non-negative integer for the number of successful operations. For the latter + * case, the @ret in the failed entry (if any) indicates the exact error. + */ +#define XENPF_resource_op 61 + +#define XEN_RESOURCE_OP_MSR_READ 0 +#define XEN_RESOURCE_OP_MSR_WRITE 1 + +struct xenpf_resource_entry { + union { + uint32_t cmd; /* IN: XEN_RESOURCE_OP_* */ + int32_t ret; /* OUT: return value for failed entry */ + } u; + uint32_t rsvd; /* IN: padding and must be zero */ + uint64_t idx; /* IN: resource address to access */ + uint64_t val; /* IN/OUT: resource value to set/get */ +}; +typedef struct xenpf_resource_entry xenpf_resource_entry_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_resource_entry_t); + +struct xenpf_resource_op { + uint32_t nr_entries; /* number of resource entry */ + uint32_t cpu; /* which cpu to run */ + XEN_GUEST_HANDLE(xenpf_resource_entry_t) entries; +}; +typedef struct xenpf_resource_op xenpf_resource_op_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_resource_op_t); + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_platform_op(const struct xen_platform_op*); + */ +struct xen_platform_op { + uint32_t cmd; + uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ + union { + struct xenpf_settime settime; + struct xenpf_add_memtype add_memtype; + struct xenpf_del_memtype del_memtype; + struct xenpf_read_memtype read_memtype; + struct xenpf_microcode_update microcode; + struct xenpf_platform_quirk platform_quirk; + struct xenpf_efi_runtime_call efi_runtime_call; + struct xenpf_firmware_info firmware_info; + struct xenpf_enter_acpi_sleep enter_acpi_sleep; + struct xenpf_change_freq change_freq; + struct xenpf_getidletime getidletime; + struct xenpf_set_processor_pminfo set_pminfo; + struct xenpf_pcpuinfo pcpu_info; + struct xenpf_pcpu_version pcpu_version; + struct xenpf_cpu_ol cpu_ol; + struct xenpf_cpu_hotadd cpu_add; + struct xenpf_mem_hotadd mem_add; + struct xenpf_core_parking core_parking; + struct xenpf_resource_op resource_op; + uint8_t pad[128]; + } u; +}; +typedef struct xen_platform_op xen_platform_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t); + +#endif /* __XEN_PUBLIC_PLATFORM_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/sched.h xen-4.9.2/extras/mini-os/include/xen/sched.h --- xen-4.9.0/extras/mini-os/include/xen/sched.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/sched.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,175 @@ +/****************************************************************************** + * sched.h + * + * Scheduler state interactions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_SCHED_H__ +#define __XEN_PUBLIC_SCHED_H__ + +#include "event_channel.h" + +/* + * `incontents 150 sched Guest Scheduler Operations + * + * The SCHEDOP interface provides mechanisms for a guest to interact + * with the scheduler, including yield, blocking and shutting itself + * down. + */ + +/* + * The prototype for this hypercall is: + * ` long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...) + * + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == Operation-specific extra argument(s), as described below. + * ... == Additional Operation-specific extra arguments, described below. + * + * Versions of Xen prior to 3.0.2 provided only the following legacy version + * of this hypercall, supporting only the commands yield, block and shutdown: + * long sched_op(int cmd, unsigned long arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) + * == SHUTDOWN_* code (SCHEDOP_shutdown) + * + * This legacy version is available to new guests as: + * ` long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg) + */ + +/* ` enum sched_op { // SCHEDOP_* => struct sched_* */ +/* + * Voluntarily yield the CPU. + * @arg == NULL. + */ +#define SCHEDOP_yield 0 + +/* + * Block execution of this VCPU until an event is received for processing. + * If called with event upcalls masked, this operation will atomically + * reenable event delivery and check for pending events before blocking the + * VCPU. This avoids a "wakeup waiting" race. + * @arg == NULL. + */ +#define SCHEDOP_block 1 + +/* + * Halt execution of this domain (all VCPUs) and notify the system controller. + * @arg == pointer to sched_shutdown_t structure. + * + * If the sched_shutdown_t reason is SHUTDOWN_suspend then + * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN + * of the guest's start info page. RDX/EDX is the third hypercall + * argument. + * + * In addition, which reason is SHUTDOWN_suspend this hypercall + * returns 1 if suspend was cancelled or the domain was merely + * checkpointed, and 0 if it is resuming in a new domain. + */ +#define SCHEDOP_shutdown 2 + +/* + * Poll a set of event-channel ports. Return when one or more are pending. An + * optional timeout may be specified. + * @arg == pointer to sched_poll_t structure. + */ +#define SCHEDOP_poll 3 + +/* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown_t structure. + */ +#define SCHEDOP_remote_shutdown 4 + +/* + * Latch a shutdown code, so that when the domain later shuts down it + * reports this code to the control tools. + * @arg == sched_shutdown_t, as for SCHEDOP_shutdown. + */ +#define SCHEDOP_shutdown_code 5 + +/* + * Setup, poke and destroy a domain watchdog timer. + * @arg == pointer to sched_watchdog_t structure. + * With id == 0, setup a domain watchdog timer to cause domain shutdown + * after timeout, returns watchdog id. + * With id != 0 and timeout == 0, destroy domain watchdog timer. + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. + */ +#define SCHEDOP_watchdog 6 +/* ` } */ + +struct sched_shutdown { + unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */ +}; +typedef struct sched_shutdown sched_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t); + +struct sched_poll { + XEN_GUEST_HANDLE(evtchn_port_t) ports; + unsigned int nr_ports; + uint64_t timeout; +}; +typedef struct sched_poll sched_poll_t; +DEFINE_XEN_GUEST_HANDLE(sched_poll_t); + +struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */ +}; +typedef struct sched_remote_shutdown sched_remote_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); + +struct sched_watchdog { + uint32_t id; /* watchdog ID */ + uint32_t timeout; /* timeout */ +}; +typedef struct sched_watchdog sched_watchdog_t; +DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t); + +/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control + * software to determine the appropriate action. For the most part, Xen does + * not care about the shutdown code. + */ +/* ` enum sched_shutdown_reason { */ +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ +#define SHUTDOWN_MAX 4 /* Maximum valid shutdown reason. */ +/* ` } */ + +#endif /* __XEN_PUBLIC_SCHED_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/sysctl.h xen-4.9.2/extras/mini-os/include/xen/sysctl.h --- xen-4.9.0/extras/mini-os/include/xen/sysctl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/sysctl.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,719 @@ +/****************************************************************************** + * sysctl.h + * + * System management operations. For use by node control stack. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_SYSCTL_H__ +#define __XEN_PUBLIC_SYSCTL_H__ + +#if !defined(__XEN__) && !defined(__XEN_TOOLS__) +#error "sysctl operations are intended for use by node control tools only" +#endif + +#include "xen.h" +#include "domctl.h" + +#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000B + +/* + * Read console content from Xen buffer ring. + */ +/* XEN_SYSCTL_readconsole */ +struct xen_sysctl_readconsole { + /* IN: Non-zero -> clear after reading. */ + uint8_t clear; + /* IN: Non-zero -> start index specified by @index field. */ + uint8_t incremental; + uint8_t pad0, pad1; + /* + * IN: Start index for consuming from ring buffer (if @incremental); + * OUT: End index after consuming from ring buffer. + */ + uint32_t index; + /* IN: Virtual address to write console data. */ + XEN_GUEST_HANDLE_64(char) buffer; + /* IN: Size of buffer; OUT: Bytes written to buffer. */ + uint32_t count; +}; +typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); + +/* Get trace buffers machine base address */ +/* XEN_SYSCTL_tbuf_op */ +struct xen_sysctl_tbuf_op { + /* IN variables */ +#define XEN_SYSCTL_TBUFOP_get_info 0 +#define XEN_SYSCTL_TBUFOP_set_cpu_mask 1 +#define XEN_SYSCTL_TBUFOP_set_evt_mask 2 +#define XEN_SYSCTL_TBUFOP_set_size 3 +#define XEN_SYSCTL_TBUFOP_enable 4 +#define XEN_SYSCTL_TBUFOP_disable 5 + uint32_t cmd; + /* IN/OUT variables */ + struct xenctl_bitmap cpu_mask; + uint32_t evt_mask; + /* OUT variables */ + uint64_aligned_t buffer_mfn; + uint32_t size; /* Also an IN variable! */ +}; +typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t); + +/* + * Get physical information about the host machine + */ +/* XEN_SYSCTL_physinfo */ + /* (x86) The platform supports HVM guests. */ +#define _XEN_SYSCTL_PHYSCAP_hvm 0 +#define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm) + /* (x86) The platform supports HVM-guest direct access to I/O devices. */ +#define _XEN_SYSCTL_PHYSCAP_hvm_directio 1 +#define XEN_SYSCTL_PHYSCAP_hvm_directio (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio) +struct xen_sysctl_physinfo { + uint32_t threads_per_core; + uint32_t cores_per_socket; + uint32_t nr_cpus; /* # CPUs currently online */ + uint32_t max_cpu_id; /* Largest possible CPU ID on this host */ + uint32_t nr_nodes; /* # nodes currently online */ + uint32_t max_node_id; /* Largest possible node ID on this host */ + uint32_t cpu_khz; + uint64_aligned_t total_pages; + uint64_aligned_t free_pages; + uint64_aligned_t scrub_pages; + uint64_aligned_t outstanding_pages; + uint32_t hw_cap[8]; + + /* XEN_SYSCTL_PHYSCAP_??? */ + uint32_t capabilities; +}; +typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t); + +/* + * Get the ID of the current scheduler. + */ +/* XEN_SYSCTL_sched_id */ +struct xen_sysctl_sched_id { + /* OUT variable */ + uint32_t sched_id; +}; +typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t); + +/* Interface for controlling Xen software performance counters. */ +/* XEN_SYSCTL_perfc_op */ +/* Sub-operations: */ +#define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */ +#define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */ +struct xen_sysctl_perfc_desc { + char name[80]; /* name of perf counter */ + uint32_t nr_vals; /* number of values for this counter */ +}; +typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t); +typedef uint32_t xen_sysctl_perfc_val_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t); + +struct xen_sysctl_perfc_op { + /* IN variables. */ + uint32_t cmd; /* XEN_SYSCTL_PERFCOP_??? */ + /* OUT variables. */ + uint32_t nr_counters; /* number of counters description */ + uint32_t nr_vals; /* number of values */ + /* counter information (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc; + /* counter values (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val; +}; +typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); + +/* XEN_SYSCTL_getdomaininfolist */ +struct xen_sysctl_getdomaininfolist { + /* IN variables. */ + domid_t first_domain; + uint32_t max_domains; + XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; + /* OUT variables. */ + uint32_t num_domains; +}; +typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); + +/* Inject debug keys into Xen. */ +/* XEN_SYSCTL_debug_keys */ +struct xen_sysctl_debug_keys { + /* IN variables. */ + XEN_GUEST_HANDLE_64(char) keys; + uint32_t nr_keys; +}; +typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); + +/* Get physical CPU information. */ +/* XEN_SYSCTL_getcpuinfo */ +struct xen_sysctl_cpuinfo { + uint64_aligned_t idletime; +}; +typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); +struct xen_sysctl_getcpuinfo { + /* IN variables. */ + uint32_t max_cpus; + XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info; + /* OUT variables. */ + uint32_t nr_cpus; +}; +typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); + +/* XEN_SYSCTL_availheap */ +struct xen_sysctl_availheap { + /* IN variables. */ + uint32_t min_bitwidth; /* Smallest address width (zero if don't care). */ + uint32_t max_bitwidth; /* Largest address width (zero if don't care). */ + int32_t node; /* NUMA node of interest (-1 for all nodes). */ + /* OUT variables. */ + uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */ +}; +typedef struct xen_sysctl_availheap xen_sysctl_availheap_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t); + +/* XEN_SYSCTL_get_pmstat */ +struct pm_px_val { + uint64_aligned_t freq; /* Px core frequency */ + uint64_aligned_t residency; /* Px residency time */ + uint64_aligned_t count; /* Px transition count */ +}; +typedef struct pm_px_val pm_px_val_t; +DEFINE_XEN_GUEST_HANDLE(pm_px_val_t); + +struct pm_px_stat { + uint8_t total; /* total Px states */ + uint8_t usable; /* usable Px states */ + uint8_t last; /* last Px state */ + uint8_t cur; /* current Px state */ + XEN_GUEST_HANDLE_64(uint64) trans_pt; /* Px transition table */ + XEN_GUEST_HANDLE_64(pm_px_val_t) pt; +}; +typedef struct pm_px_stat pm_px_stat_t; +DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t); + +struct pm_cx_stat { + uint32_t nr; /* entry nr in triggers & residencies, including C0 */ + uint32_t last; /* last Cx state */ + uint64_aligned_t idle_time; /* idle time from boot */ + XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */ + XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */ + uint32_t nr_pc; /* entry nr in pc[] */ + uint32_t nr_cc; /* entry nr in cc[] */ + /* + * These two arrays may (and generally will) have unused slots; slots not + * having a corresponding hardware register will not be written by the + * hypervisor. It is therefore up to the caller to put a suitable sentinel + * into all slots before invoking the function. + * Indexing is 1-biased (PC1/CC1 being at index 0). + */ + XEN_GUEST_HANDLE_64(uint64) pc; + XEN_GUEST_HANDLE_64(uint64) cc; +}; + +struct xen_sysctl_get_pmstat { +#define PMSTAT_CATEGORY_MASK 0xf0 +#define PMSTAT_PX 0x10 +#define PMSTAT_CX 0x20 +#define PMSTAT_get_max_px (PMSTAT_PX | 0x1) +#define PMSTAT_get_pxstat (PMSTAT_PX | 0x2) +#define PMSTAT_reset_pxstat (PMSTAT_PX | 0x3) +#define PMSTAT_get_max_cx (PMSTAT_CX | 0x1) +#define PMSTAT_get_cxstat (PMSTAT_CX | 0x2) +#define PMSTAT_reset_cxstat (PMSTAT_CX | 0x3) + uint32_t type; + uint32_t cpuid; + union { + struct pm_px_stat getpx; + struct pm_cx_stat getcx; + /* other struct for tx, etc */ + } u; +}; +typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t); + +/* XEN_SYSCTL_cpu_hotplug */ +struct xen_sysctl_cpu_hotplug { + /* IN variables */ + uint32_t cpu; /* Physical cpu. */ +#define XEN_SYSCTL_CPU_HOTPLUG_ONLINE 0 +#define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1 + uint32_t op; /* hotplug opcode */ +}; +typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t); + +/* + * Get/set xen power management, include + * 1. cpufreq governors and related parameters + */ +/* XEN_SYSCTL_pm_op */ +struct xen_userspace { + uint32_t scaling_setspeed; +}; +typedef struct xen_userspace xen_userspace_t; + +struct xen_ondemand { + uint32_t sampling_rate_max; + uint32_t sampling_rate_min; + + uint32_t sampling_rate; + uint32_t up_threshold; +}; +typedef struct xen_ondemand xen_ondemand_t; + +/* + * cpufreq para name of this structure named + * same as sysfs file name of native linux + */ +#define CPUFREQ_NAME_LEN 16 +struct xen_get_cpufreq_para { + /* IN/OUT variable */ + uint32_t cpu_num; + uint32_t freq_num; + uint32_t gov_num; + + /* for all governors */ + /* OUT variable */ + XEN_GUEST_HANDLE_64(uint32) affected_cpus; + XEN_GUEST_HANDLE_64(uint32) scaling_available_frequencies; + XEN_GUEST_HANDLE_64(char) scaling_available_governors; + char scaling_driver[CPUFREQ_NAME_LEN]; + + uint32_t cpuinfo_cur_freq; + uint32_t cpuinfo_max_freq; + uint32_t cpuinfo_min_freq; + uint32_t scaling_cur_freq; + + char scaling_governor[CPUFREQ_NAME_LEN]; + uint32_t scaling_max_freq; + uint32_t scaling_min_freq; + + /* for specific governor */ + union { + struct xen_userspace userspace; + struct xen_ondemand ondemand; + } u; + + int32_t turbo_enabled; +}; + +struct xen_set_cpufreq_gov { + char scaling_governor[CPUFREQ_NAME_LEN]; +}; + +struct xen_set_cpufreq_para { + #define SCALING_MAX_FREQ 1 + #define SCALING_MIN_FREQ 2 + #define SCALING_SETSPEED 3 + #define SAMPLING_RATE 4 + #define UP_THRESHOLD 5 + + uint32_t ctrl_type; + uint32_t ctrl_value; +}; + +struct xen_sysctl_pm_op { + #define PM_PARA_CATEGORY_MASK 0xf0 + #define CPUFREQ_PARA 0x10 + + /* cpufreq command type */ + #define GET_CPUFREQ_PARA (CPUFREQ_PARA | 0x01) + #define SET_CPUFREQ_GOV (CPUFREQ_PARA | 0x02) + #define SET_CPUFREQ_PARA (CPUFREQ_PARA | 0x03) + #define GET_CPUFREQ_AVGFREQ (CPUFREQ_PARA | 0x04) + + /* set/reset scheduler power saving option */ + #define XEN_SYSCTL_pm_op_set_sched_opt_smt 0x21 + + /* cpuidle max_cstate access command */ + #define XEN_SYSCTL_pm_op_get_max_cstate 0x22 + #define XEN_SYSCTL_pm_op_set_max_cstate 0x23 + + /* set scheduler migration cost value */ + #define XEN_SYSCTL_pm_op_set_vcpu_migration_delay 0x24 + #define XEN_SYSCTL_pm_op_get_vcpu_migration_delay 0x25 + + /* enable/disable turbo mode when in dbs governor */ + #define XEN_SYSCTL_pm_op_enable_turbo 0x26 + #define XEN_SYSCTL_pm_op_disable_turbo 0x27 + + uint32_t cmd; + uint32_t cpuid; + union { + struct xen_get_cpufreq_para get_para; + struct xen_set_cpufreq_gov set_gov; + struct xen_set_cpufreq_para set_para; + uint64_aligned_t get_avgfreq; + uint32_t set_sched_opt_smt; + uint32_t get_max_cstate; + uint32_t set_max_cstate; + uint32_t get_vcpu_migration_delay; + uint32_t set_vcpu_migration_delay; + } u; +}; + +/* XEN_SYSCTL_page_offline_op */ +struct xen_sysctl_page_offline_op { + /* IN: range of page to be offlined */ +#define sysctl_page_offline 1 +#define sysctl_page_online 2 +#define sysctl_query_page_offline 3 + uint32_t cmd; + uint32_t start; + uint32_t end; + /* OUT: result of page offline request */ + /* + * bit 0~15: result flags + * bit 16~31: owner + */ + XEN_GUEST_HANDLE(uint32) status; +}; + +#define PG_OFFLINE_STATUS_MASK (0xFFUL) + +/* The result is invalid, i.e. HV does not handle it */ +#define PG_OFFLINE_INVALID (0x1UL << 0) + +#define PG_OFFLINE_OFFLINED (0x1UL << 1) +#define PG_OFFLINE_PENDING (0x1UL << 2) +#define PG_OFFLINE_FAILED (0x1UL << 3) +#define PG_OFFLINE_AGAIN (0x1UL << 4) + +#define PG_ONLINE_FAILED PG_OFFLINE_FAILED +#define PG_ONLINE_ONLINED PG_OFFLINE_OFFLINED + +#define PG_OFFLINE_STATUS_OFFLINED (0x1UL << 1) +#define PG_OFFLINE_STATUS_ONLINE (0x1UL << 2) +#define PG_OFFLINE_STATUS_OFFLINE_PENDING (0x1UL << 3) +#define PG_OFFLINE_STATUS_BROKEN (0x1UL << 4) + +#define PG_OFFLINE_MISC_MASK (0xFFUL << 4) + +/* valid when PG_OFFLINE_FAILED or PG_OFFLINE_PENDING */ +#define PG_OFFLINE_XENPAGE (0x1UL << 8) +#define PG_OFFLINE_DOM0PAGE (0x1UL << 9) +#define PG_OFFLINE_ANONYMOUS (0x1UL << 10) +#define PG_OFFLINE_NOT_CONV_RAM (0x1UL << 11) +#define PG_OFFLINE_OWNED (0x1UL << 12) + +#define PG_OFFLINE_BROKEN (0x1UL << 13) +#define PG_ONLINE_BROKEN PG_OFFLINE_BROKEN + +#define PG_OFFLINE_OWNER_SHIFT 16 + +/* XEN_SYSCTL_lockprof_op */ +/* Sub-operations: */ +#define XEN_SYSCTL_LOCKPROF_reset 1 /* Reset all profile data to zero. */ +#define XEN_SYSCTL_LOCKPROF_query 2 /* Get lock profile information. */ +/* Record-type: */ +#define LOCKPROF_TYPE_GLOBAL 0 /* global lock, idx meaningless */ +#define LOCKPROF_TYPE_PERDOM 1 /* per-domain lock, idx is domid */ +#define LOCKPROF_TYPE_N 2 /* number of types */ +struct xen_sysctl_lockprof_data { + char name[40]; /* lock name (may include up to 2 %d specifiers) */ + int32_t type; /* LOCKPROF_TYPE_??? */ + int32_t idx; /* index (e.g. domain id) */ + uint64_aligned_t lock_cnt; /* # of locking succeeded */ + uint64_aligned_t block_cnt; /* # of wait for lock */ + uint64_aligned_t lock_time; /* nsecs lock held */ + uint64_aligned_t block_time; /* nsecs waited for lock */ +}; +typedef struct xen_sysctl_lockprof_data xen_sysctl_lockprof_data_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_data_t); +struct xen_sysctl_lockprof_op { + /* IN variables. */ + uint32_t cmd; /* XEN_SYSCTL_LOCKPROF_??? */ + uint32_t max_elem; /* size of output buffer */ + /* OUT variables (query only). */ + uint32_t nr_elem; /* number of elements available */ + uint64_aligned_t time; /* nsecs of profile measurement */ + /* profile information (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_lockprof_data_t) data; +}; +typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t); + +/* XEN_SYSCTL_topologyinfo */ +#define INVALID_TOPOLOGY_ID (~0U) +struct xen_sysctl_topologyinfo { + /* + * IN: maximum addressable entry in the caller-provided arrays. + * OUT: largest cpu identifier in the system. + * If OUT is greater than IN then the arrays are truncated! + * If OUT is leass than IN then the array tails are not written by sysctl. + */ + uint32_t max_cpu_index; + + /* + * If not NULL, these arrays are filled with core/socket/node identifier + * for each cpu. + * If a cpu has no core/socket/node information (e.g., cpu not present) + * then the sentinel value ~0u is written to each array. + * The number of array elements written by the sysctl is: + * min(@max_cpu_index_IN,@max_cpu_index_OUT)+1 + */ + XEN_GUEST_HANDLE_64(uint32) cpu_to_core; + XEN_GUEST_HANDLE_64(uint32) cpu_to_socket; + XEN_GUEST_HANDLE_64(uint32) cpu_to_node; +}; +typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t); + +/* XEN_SYSCTL_numainfo */ +#define INVALID_NUMAINFO_ID (~0U) +struct xen_sysctl_numainfo { + /* + * IN: maximum addressable entry in the caller-provided arrays. + * OUT: largest node identifier in the system. + * If OUT is greater than IN then the arrays are truncated! + */ + uint32_t max_node_index; + + /* NB. Entries are 0 if node is not present. */ + XEN_GUEST_HANDLE_64(uint64) node_to_memsize; + XEN_GUEST_HANDLE_64(uint64) node_to_memfree; + + /* + * Array, of size (max_node_index+1)^2, listing memory access distances + * between nodes. If an entry has no node distance information (e.g., node + * not present) then the value ~0u is written. + * + * Note that the array rows must be indexed by multiplying by the minimum + * of the caller-provided max_node_index and the returned value of + * max_node_index. That is, if the largest node index in the system is + * smaller than the caller can handle, a smaller 2-d array is constructed + * within the space provided by the caller. When this occurs, trailing + * space provided by the caller is not modified. If the largest node index + * in the system is larger than the caller can handle, then a 2-d array of + * the maximum size handleable by the caller is constructed. + */ + XEN_GUEST_HANDLE_64(uint32) node_to_node_distance; +}; +typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t); + +/* XEN_SYSCTL_cpupool_op */ +#define XEN_SYSCTL_CPUPOOL_OP_CREATE 1 /* C */ +#define XEN_SYSCTL_CPUPOOL_OP_DESTROY 2 /* D */ +#define XEN_SYSCTL_CPUPOOL_OP_INFO 3 /* I */ +#define XEN_SYSCTL_CPUPOOL_OP_ADDCPU 4 /* A */ +#define XEN_SYSCTL_CPUPOOL_OP_RMCPU 5 /* R */ +#define XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */ +#define XEN_SYSCTL_CPUPOOL_OP_FREEINFO 7 /* F */ +#define XEN_SYSCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF +struct xen_sysctl_cpupool_op { + uint32_t op; /* IN */ + uint32_t cpupool_id; /* IN: CDIARM OUT: CI */ + uint32_t sched_id; /* IN: C OUT: I */ + uint32_t domid; /* IN: M */ + uint32_t cpu; /* IN: AR */ + uint32_t n_dom; /* OUT: I */ + struct xenctl_bitmap cpumap; /* OUT: IF */ +}; +typedef struct xen_sysctl_cpupool_op xen_sysctl_cpupool_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpupool_op_t); + +#define ARINC653_MAX_DOMAINS_PER_SCHEDULE 64 +/* + * This structure is used to pass a new ARINC653 schedule from a + * privileged domain (ie dom0) to Xen. + */ +struct xen_sysctl_arinc653_schedule { + /* major_frame holds the time for the new schedule's major frame + * in nanoseconds. */ + uint64_aligned_t major_frame; + /* num_sched_entries holds how many of the entries in the + * sched_entries[] array are valid. */ + uint8_t num_sched_entries; + /* The sched_entries array holds the actual schedule entries. */ + struct { + /* dom_handle must match a domain's UUID */ + xen_domain_handle_t dom_handle; + /* If a domain has multiple VCPUs, vcpu_id specifies which one + * this schedule entry applies to. It should be set to 0 if + * there is only one VCPU for the domain. */ + unsigned int vcpu_id; + /* runtime specifies the amount of time that should be allocated + * to this VCPU per major frame. It is specified in nanoseconds */ + uint64_aligned_t runtime; + } sched_entries[ARINC653_MAX_DOMAINS_PER_SCHEDULE]; +}; +typedef struct xen_sysctl_arinc653_schedule xen_sysctl_arinc653_schedule_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_arinc653_schedule_t); + +struct xen_sysctl_credit_schedule { + /* Length of timeslice in milliseconds */ +#define XEN_SYSCTL_CSCHED_TSLICE_MAX 1000 +#define XEN_SYSCTL_CSCHED_TSLICE_MIN 1 + unsigned tslice_ms; + /* Rate limit (minimum timeslice) in microseconds */ +#define XEN_SYSCTL_SCHED_RATELIMIT_MAX 500000 +#define XEN_SYSCTL_SCHED_RATELIMIT_MIN 100 + unsigned ratelimit_us; +}; +typedef struct xen_sysctl_credit_schedule xen_sysctl_credit_schedule_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_credit_schedule_t); + +/* XEN_SYSCTL_scheduler_op */ +/* Set or get info? */ +#define XEN_SYSCTL_SCHEDOP_putinfo 0 +#define XEN_SYSCTL_SCHEDOP_getinfo 1 +struct xen_sysctl_scheduler_op { + uint32_t cpupool_id; /* Cpupool whose scheduler is to be targetted. */ + uint32_t sched_id; /* XEN_SCHEDULER_* (domctl.h) */ + uint32_t cmd; /* XEN_SYSCTL_SCHEDOP_* */ + union { + struct xen_sysctl_sched_arinc653 { + XEN_GUEST_HANDLE_64(xen_sysctl_arinc653_schedule_t) schedule; + } sched_arinc653; + struct xen_sysctl_credit_schedule sched_credit; + } u; +}; +typedef struct xen_sysctl_scheduler_op xen_sysctl_scheduler_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_scheduler_op_t); + +/* XEN_SYSCTL_coverage_op */ +/* + * Get total size of information, to help allocate + * the buffer. The pointer points to a 32 bit value. + */ +#define XEN_SYSCTL_COVERAGE_get_total_size 0 + +/* + * Read coverage information in a single run + * You must use a tool to split them. + */ +#define XEN_SYSCTL_COVERAGE_read 1 + +/* + * Reset all the coverage counters to 0 + * No parameters. + */ +#define XEN_SYSCTL_COVERAGE_reset 2 + +/* + * Like XEN_SYSCTL_COVERAGE_read but reset also + * counters to 0 in a single call. + */ +#define XEN_SYSCTL_COVERAGE_read_and_reset 3 + +struct xen_sysctl_coverage_op { + uint32_t cmd; /* XEN_SYSCTL_COVERAGE_* */ + union { + uint32_t total_size; /* OUT */ + XEN_GUEST_HANDLE_64(uint8) raw_info; /* OUT */ + } u; +}; +typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t); + +#define XEN_SYSCTL_PSR_CMT_get_total_rmid 0 +#define XEN_SYSCTL_PSR_CMT_get_l3_upscaling_factor 1 +/* The L3 cache size is returned in KB unit */ +#define XEN_SYSCTL_PSR_CMT_get_l3_cache_size 2 +#define XEN_SYSCTL_PSR_CMT_enabled 3 +#define XEN_SYSCTL_PSR_CMT_get_l3_event_mask 4 +struct xen_sysctl_psr_cmt_op { + uint32_t cmd; /* IN: XEN_SYSCTL_PSR_CMT_* */ + uint32_t flags; /* padding variable, may be extended for future use */ + union { + uint64_t data; /* OUT */ + struct { + uint32_t cpu; /* IN */ + uint32_t rsvd; + } l3_cache; + } u; +}; +typedef struct xen_sysctl_psr_cmt_op xen_sysctl_psr_cmt_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_psr_cmt_op_t); + +struct xen_sysctl { + uint32_t cmd; +#define XEN_SYSCTL_readconsole 1 +#define XEN_SYSCTL_tbuf_op 2 +#define XEN_SYSCTL_physinfo 3 +#define XEN_SYSCTL_sched_id 4 +#define XEN_SYSCTL_perfc_op 5 +#define XEN_SYSCTL_getdomaininfolist 6 +#define XEN_SYSCTL_debug_keys 7 +#define XEN_SYSCTL_getcpuinfo 8 +#define XEN_SYSCTL_availheap 9 +#define XEN_SYSCTL_get_pmstat 10 +#define XEN_SYSCTL_cpu_hotplug 11 +#define XEN_SYSCTL_pm_op 12 +#define XEN_SYSCTL_page_offline_op 14 +#define XEN_SYSCTL_lockprof_op 15 +#define XEN_SYSCTL_topologyinfo 16 +#define XEN_SYSCTL_numainfo 17 +#define XEN_SYSCTL_cpupool_op 18 +#define XEN_SYSCTL_scheduler_op 19 +#define XEN_SYSCTL_coverage_op 20 +#define XEN_SYSCTL_psr_cmt_op 21 + uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ + union { + struct xen_sysctl_readconsole readconsole; + struct xen_sysctl_tbuf_op tbuf_op; + struct xen_sysctl_physinfo physinfo; + struct xen_sysctl_topologyinfo topologyinfo; + struct xen_sysctl_numainfo numainfo; + struct xen_sysctl_sched_id sched_id; + struct xen_sysctl_perfc_op perfc_op; + struct xen_sysctl_getdomaininfolist getdomaininfolist; + struct xen_sysctl_debug_keys debug_keys; + struct xen_sysctl_getcpuinfo getcpuinfo; + struct xen_sysctl_availheap availheap; + struct xen_sysctl_get_pmstat get_pmstat; + struct xen_sysctl_cpu_hotplug cpu_hotplug; + struct xen_sysctl_pm_op pm_op; + struct xen_sysctl_page_offline_op page_offline; + struct xen_sysctl_lockprof_op lockprof_op; + struct xen_sysctl_cpupool_op cpupool_op; + struct xen_sysctl_scheduler_op scheduler_op; + struct xen_sysctl_coverage_op coverage_op; + struct xen_sysctl_psr_cmt_op psr_cmt_op; + uint8_t pad[128]; + } u; +}; +typedef struct xen_sysctl xen_sysctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t); + +#endif /* __XEN_PUBLIC_SYSCTL_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/tmem.h xen-4.9.2/extras/mini-os/include/xen/tmem.h --- xen-4.9.0/extras/mini-os/include/xen/tmem.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/tmem.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,152 @@ +/****************************************************************************** + * tmem.h + * + * Guest OS interface to Xen Transcendent Memory. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_TMEM_H__ +#define __XEN_PUBLIC_TMEM_H__ + +#include "xen.h" + +/* version of ABI */ +#define TMEM_SPEC_VERSION 1 + +/* Commands to HYPERVISOR_tmem_op() */ +#define TMEM_CONTROL 0 +#define TMEM_NEW_POOL 1 +#define TMEM_DESTROY_POOL 2 +#define TMEM_PUT_PAGE 4 +#define TMEM_GET_PAGE 5 +#define TMEM_FLUSH_PAGE 6 +#define TMEM_FLUSH_OBJECT 7 +#if __XEN_INTERFACE_VERSION__ < 0x00040400 +#define TMEM_NEW_PAGE 3 +#define TMEM_READ 8 +#define TMEM_WRITE 9 +#define TMEM_XCHG 10 +#endif + +/* Privileged commands to HYPERVISOR_tmem_op() */ +#define TMEM_AUTH 101 +#define TMEM_RESTORE_NEW 102 + +/* Subops for HYPERVISOR_tmem_op(TMEM_CONTROL) */ +#define TMEMC_THAW 0 +#define TMEMC_FREEZE 1 +#define TMEMC_FLUSH 2 +#define TMEMC_DESTROY 3 +#define TMEMC_LIST 4 +#define TMEMC_SET_WEIGHT 5 +#define TMEMC_SET_CAP 6 +#define TMEMC_SET_COMPRESS 7 +#define TMEMC_QUERY_FREEABLE_MB 8 +#define TMEMC_SAVE_BEGIN 10 +#define TMEMC_SAVE_GET_VERSION 11 +#define TMEMC_SAVE_GET_MAXPOOLS 12 +#define TMEMC_SAVE_GET_CLIENT_WEIGHT 13 +#define TMEMC_SAVE_GET_CLIENT_CAP 14 +#define TMEMC_SAVE_GET_CLIENT_FLAGS 15 +#define TMEMC_SAVE_GET_POOL_FLAGS 16 +#define TMEMC_SAVE_GET_POOL_NPAGES 17 +#define TMEMC_SAVE_GET_POOL_UUID 18 +#define TMEMC_SAVE_GET_NEXT_PAGE 19 +#define TMEMC_SAVE_GET_NEXT_INV 20 +#define TMEMC_SAVE_END 21 +#define TMEMC_RESTORE_BEGIN 30 +#define TMEMC_RESTORE_PUT_PAGE 32 +#define TMEMC_RESTORE_FLUSH_PAGE 33 + +/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ +#define TMEM_POOL_PERSIST 1 +#define TMEM_POOL_SHARED 2 +#define TMEM_POOL_PRECOMPRESSED 4 +#define TMEM_POOL_PAGESIZE_SHIFT 4 +#define TMEM_POOL_PAGESIZE_MASK 0xf +#define TMEM_POOL_VERSION_SHIFT 24 +#define TMEM_POOL_VERSION_MASK 0xff +#define TMEM_POOL_RESERVED_BITS 0x00ffff00 + +/* Bits for client flags (save/restore) */ +#define TMEM_CLIENT_COMPRESS 1 +#define TMEM_CLIENT_FROZEN 2 + +/* Special errno values */ +#define EFROZEN 1000 +#define EEMPTY 1001 + + +#ifndef __ASSEMBLY__ +#if __XEN_INTERFACE_VERSION__ < 0x00040400 +typedef xen_pfn_t tmem_cli_mfn_t; +#endif +typedef XEN_GUEST_HANDLE(char) tmem_cli_va_t; +struct tmem_op { + uint32_t cmd; + int32_t pool_id; + union { + struct { + uint64_t uuid[2]; + uint32_t flags; + uint32_t arg1; + } creat; /* for cmd == TMEM_NEW_POOL, TMEM_AUTH, TMEM_RESTORE_NEW */ + struct { + uint32_t subop; + uint32_t cli_id; + uint32_t arg1; + uint32_t arg2; + uint64_t oid[3]; + tmem_cli_va_t buf; + } ctrl; /* for cmd == TMEM_CONTROL */ + struct { + + uint64_t oid[3]; + uint32_t index; + uint32_t tmem_offset; + uint32_t pfn_offset; + uint32_t len; + xen_pfn_t cmfn; /* client machine page frame */ + } gen; /* for all other cmd ("generic") */ + } u; +}; +typedef struct tmem_op tmem_op_t; +DEFINE_XEN_GUEST_HANDLE(tmem_op_t); + +struct tmem_handle { + uint32_t pool_id; + uint32_t index; + uint64_t oid[3]; +}; +#endif + +#endif /* __XEN_PUBLIC_TMEM_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/trace.h xen-4.9.2/extras/mini-os/include/xen/trace.h --- xen-4.9.0/extras/mini-os/include/xen/trace.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/trace.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,331 @@ +/****************************************************************************** + * include/public/trace.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Mark Williamson, (C) 2004 Intel Research Cambridge + * Copyright (C) 2005 Bin Ren + */ + +#ifndef __XEN_PUBLIC_TRACE_H__ +#define __XEN_PUBLIC_TRACE_H__ + +#define TRACE_EXTRA_MAX 7 +#define TRACE_EXTRA_SHIFT 28 + +/* Trace classes */ +#define TRC_CLS_SHIFT 16 +#define TRC_GEN 0x0001f000 /* General trace */ +#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ +#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ +#define TRC_HVM 0x0008f000 /* Xen HVM trace */ +#define TRC_MEM 0x0010f000 /* Xen memory trace */ +#define TRC_PV 0x0020f000 /* Xen PV traces */ +#define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */ +#define TRC_HW 0x0080f000 /* Xen hardware-related traces */ +#define TRC_GUEST 0x0800f000 /* Guest-generated traces */ +#define TRC_ALL 0x0ffff000 +#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) +#define TRC_HD_CYCLE_FLAG (1UL<<31) +#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) ) +#define TRC_HD_EXTRA(x) (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX) + +/* Trace subclasses */ +#define TRC_SUBCLS_SHIFT 12 + +/* trace subclasses for SVM */ +#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ +#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ +#define TRC_HVM_EMUL 0x00084000 /* emulated devices */ + +#define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ +#define TRC_SCHED_CLASS 0x00022000 /* Scheduler-specific */ +#define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */ + +/* + * The highest 3 bits of the last 12 bits of TRC_SCHED_CLASS above are + * reserved for encoding what scheduler produced the information. The + * actual event is encoded in the last 9 bits. + * + * This means we have 8 scheduling IDs available (which means at most 8 + * schedulers generating events) and, in each scheduler, up to 512 + * different events. + */ +#define TRC_SCHED_ID_BITS 3 +#define TRC_SCHED_ID_SHIFT (TRC_SUBCLS_SHIFT - TRC_SCHED_ID_BITS) +#define TRC_SCHED_ID_MASK (((1UL<cpu_offset[cpu]). + */ +struct t_info { + uint16_t tbuf_size; /* Size in pages of each trace buffer */ + uint16_t mfn_offset[]; /* Offset within t_info structure of the page list per cpu */ + /* MFN lists immediately after the header */ +}; + +#endif /* __XEN_PUBLIC_TRACE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/vcpu.h xen-4.9.2/extras/mini-os/include/xen/vcpu.h --- xen-4.9.0/extras/mini-os/include/xen/vcpu.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/vcpu.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,240 @@ +/****************************************************************************** + * vcpu.h + * + * VCPU initialisation, query, and hotplug. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_VCPU_H__ +#define __XEN_PUBLIC_VCPU_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * long vcpu_op(int cmd, unsigned int vcpuid, void *extra_args) + * @cmd == VCPUOP_??? (VCPU operation). + * @vcpuid == VCPU to operate on. + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Initialise a VCPU. Each VCPU can be initialised only once. A + * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. + * + * @extra_arg == pointer to vcpu_guest_context structure containing initial + * state for the VCPU. + */ +#define VCPUOP_initialise 0 + +/* + * Bring up a VCPU. This makes the VCPU runnable. This operation will fail + * if the VCPU has not been initialised (VCPUOP_initialise). + */ +#define VCPUOP_up 1 + +/* + * Bring down a VCPU (i.e., make it non-runnable). + * There are a few caveats that callers should observe: + * 1. This operation may return, and VCPU_is_up may return false, before the + * VCPU stops running (i.e., the command is asynchronous). It is a good + * idea to ensure that the VCPU has entered a non-critical loop before + * bringing it down. Alternatively, this operation is guaranteed + * synchronous if invoked by the VCPU itself. + * 2. After a VCPU is initialised, there is currently no way to drop all its + * references to domain memory. Even a VCPU that is down still holds + * memory references via its pagetable base pointer and GDT. It is good + * practise to move a VCPU onto an 'idle' or default page table, LDT and + * GDT before bringing it down. + */ +#define VCPUOP_down 2 + +/* Returns 1 if the given VCPU is up. */ +#define VCPUOP_is_up 3 + +/* + * Return information about the state and running time of a VCPU. + * @extra_arg == pointer to vcpu_runstate_info structure. + */ +#define VCPUOP_get_runstate_info 4 +struct vcpu_runstate_info { + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; +}; +typedef struct vcpu_runstate_info vcpu_runstate_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t); + +/* VCPU is currently running on a physical CPU. */ +#define RUNSTATE_running 0 + +/* VCPU is runnable, but not currently scheduled on any physical CPU. */ +#define RUNSTATE_runnable 1 + +/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */ +#define RUNSTATE_blocked 2 + +/* + * VCPU is not runnable, but it is not blocked. + * This is a 'catch all' state for things like hotplug and pauses by the + * system administrator (or for critical sections in the hypervisor). + * RUNSTATE_blocked dominates this state (it is the preferred state). + */ +#define RUNSTATE_offline 3 + +/* + * Register a shared memory area from which the guest may obtain its own + * runstate information without needing to execute a hypercall. + * Notes: + * 1. The registered address may be virtual or physical or guest handle, + * depending on the platform. Virtual address or guest handle should be + * registered on x86 systems. + * 2. Only one shared area may be registered per VCPU. The shared area is + * updated by the hypervisor each time the VCPU is scheduled. Thus + * runstate.state will always be RUNSTATE_running and + * runstate.state_entry_time will indicate the system time at which the + * VCPU was last scheduled to run. + * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. + */ +#define VCPUOP_register_runstate_memory_area 5 +struct vcpu_register_runstate_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_runstate_info_t) h; + struct vcpu_runstate_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t); + +/* + * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer + * which can be set via these commands. Periods smaller than one millisecond + * may not be supported. + */ +#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ +#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ +struct vcpu_set_periodic_timer { + uint64_t period_ns; +}; +typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t); + +/* + * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot + * timer which can be set via these commands. + */ +#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ +#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */ +struct vcpu_set_singleshot_timer { + uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */ + uint32_t flags; /* VCPU_SSHOTTMR_??? */ +}; +typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t); + +/* Flags to VCPUOP_set_singleshot_timer. */ + /* Require the timeout to be in the future (return -ETIME if it's passed). */ +#define _VCPU_SSHOTTMR_future (0) +#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) + +/* + * Register a memory location in the guest address space for the + * vcpu_info structure. This allows the guest to place the vcpu_info + * structure in a convenient place, such as in a per-cpu data area. + * The pointer need not be page aligned, but the structure must not + * cross a page boundary. + * + * This may be called only once per vcpu. + */ +#define VCPUOP_register_vcpu_info 10 /* arg == vcpu_register_vcpu_info_t */ +struct vcpu_register_vcpu_info { + uint64_t mfn; /* mfn of page to place vcpu_info */ + uint32_t offset; /* offset within page */ + uint32_t rsvd; /* unused */ +}; +typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t); + +/* Send an NMI to the specified VCPU. @extra_arg == NULL. */ +#define VCPUOP_send_nmi 11 + +/* + * Get the physical ID information for a pinned vcpu's underlying physical + * processor. The physical ID informmation is architecture-specific. + * On x86: id[31:0]=apic_id, id[63:32]=acpi_id. + * This command returns -EINVAL if it is not a valid operation for this VCPU. + */ +#define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ +struct vcpu_get_physid { + uint64_t phys_id; +}; +typedef struct vcpu_get_physid vcpu_get_physid_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t); +#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid)) +#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32)) + +/* + * Register a memory location to get a secondary copy of the vcpu time + * parameters. The master copy still exists as part of the vcpu shared + * memory area, and this secondary copy is updated whenever the master copy + * is updated (and using the same versioning scheme for synchronisation). + * + * The intent is that this copy may be mapped (RO) into userspace so + * that usermode can compute system time using the time info and the + * tsc. Usermode will see an array of vcpu_time_info structures, one + * for each vcpu, and choose the right one by an existing mechanism + * which allows it to get the current vcpu number (such as via a + * segment limit). It can then apply the normal algorithm to compute + * system time from the tsc. + * + * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. + */ +#define VCPUOP_register_vcpu_time_memory_area 13 +DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t); +struct vcpu_register_time_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_time_info_t) h; + struct vcpu_time_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t); + +#endif /* __XEN_PUBLIC_VCPU_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/version.h xen-4.9.2/extras/mini-os/include/xen/version.h --- xen-4.9.0/extras/mini-os/include/xen/version.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/version.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,96 @@ +/****************************************************************************** + * version.h + * + * Xen version, type, and compile information. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Nguyen Anh Quynh + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_VERSION_H__ +#define __XEN_PUBLIC_VERSION_H__ + +#include "xen.h" + +/* NB. All ops return zero on success, except XENVER_{version,pagesize} */ + +/* arg == NULL; returns major:minor (16:16). */ +#define XENVER_version 0 + +/* arg == xen_extraversion_t. */ +#define XENVER_extraversion 1 +typedef char xen_extraversion_t[16]; +#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t)) + +/* arg == xen_compile_info_t. */ +#define XENVER_compile_info 2 +struct xen_compile_info { + char compiler[64]; + char compile_by[16]; + char compile_domain[32]; + char compile_date[32]; +}; +typedef struct xen_compile_info xen_compile_info_t; + +#define XENVER_capabilities 3 +typedef char xen_capabilities_info_t[1024]; +#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t)) + +#define XENVER_changeset 4 +typedef char xen_changeset_info_t[64]; +#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t)) + +#define XENVER_platform_parameters 5 +struct xen_platform_parameters { + xen_ulong_t virt_start; +}; +typedef struct xen_platform_parameters xen_platform_parameters_t; + +#define XENVER_get_features 6 +struct xen_feature_info { + unsigned int submap_idx; /* IN: which 32-bit submap to return */ + uint32_t submap; /* OUT: 32-bit submap */ +}; +typedef struct xen_feature_info xen_feature_info_t; + +/* Declares the features reported by XENVER_get_features. */ +#include "features.h" + +/* arg == NULL; returns host memory page size. */ +#define XENVER_pagesize 7 + +/* arg == xen_domain_handle_t. */ +#define XENVER_guest_handle 8 + +#define XENVER_commandline 9 +typedef char xen_commandline_t[1024]; + +#endif /* __XEN_PUBLIC_VERSION_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/xencomm.h xen-4.9.2/extras/mini-os/include/xen/xencomm.h --- xen-4.9.0/extras/mini-os/include/xen/xencomm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/xencomm.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,41 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) IBM Corp. 2006 + */ + +#ifndef _XEN_XENCOMM_H_ +#define _XEN_XENCOMM_H_ + +/* A xencomm descriptor is a scatter/gather list containing physical + * addresses corresponding to a virtually contiguous memory area. The + * hypervisor translates these physical addresses to machine addresses to copy + * to and from the virtually contiguous area. + */ + +#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */ +#define XENCOMM_INVALID (~0UL) + +struct xencomm_desc { + uint32_t magic; + uint32_t nr_addrs; /* the number of entries in address[] */ + uint64_t address[0]; +}; + +#endif /* _XEN_XENCOMM_H_ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/xen-compat.h xen-4.9.2/extras/mini-os/include/xen/xen-compat.h --- xen-4.9.0/extras/mini-os/include/xen/xen-compat.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/xen-compat.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,44 @@ +/****************************************************************************** + * xen-compat.h + * + * Guest OS interface to Xen. Compatibility layer. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Christian Limpach + */ + +#ifndef __XEN_PUBLIC_XEN_COMPAT_H__ +#define __XEN_PUBLIC_XEN_COMPAT_H__ + +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040600 + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +/* Xen is built with matching headers and implements the latest interface. */ +#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ +#elif !defined(__XEN_INTERFACE_VERSION__) +/* Guests which do not specify a version get the legacy interface. */ +#define __XEN_INTERFACE_VERSION__ 0x00000000 +#endif + +#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__ +#error "These header files do not support the requested interface version." +#endif + +#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/xen.h xen-4.9.2/extras/mini-os/include/xen/xen.h --- xen-4.9.0/extras/mini-os/include/xen/xen.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/xen.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,899 @@ +/****************************************************************************** + * xen.h + * + * Guest OS interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_XEN_H__ +#define __XEN_PUBLIC_XEN_H__ + +#include "xen-compat.h" + +#if defined(__i386__) || defined(__x86_64__) +#include "arch-x86/xen.h" +#elif defined(__arm__) || defined (__aarch64__) +#include "arch-arm.h" +#else +#error "Unsupported architecture" +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +DEFINE_XEN_GUEST_HANDLE(char); +__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); +DEFINE_XEN_GUEST_HANDLE(int); +__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +#if __XEN_INTERFACE_VERSION__ < 0x00040300 +DEFINE_XEN_GUEST_HANDLE(long); +__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +#endif +DEFINE_XEN_GUEST_HANDLE(void); + +DEFINE_XEN_GUEST_HANDLE(uint64_t); +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); +DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); +#endif + +/* + * HYPERCALLS + */ + +/* `incontents 100 hcalls List of hypercalls + * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*() + */ + +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */ +#define __HYPERVISOR_platform_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */ +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */ +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_xsm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 +#define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ + +/* Architecture-specific hypercall definitions. */ +#define __HYPERVISOR_arch_0 48 +#define __HYPERVISOR_arch_1 49 +#define __HYPERVISOR_arch_2 50 +#define __HYPERVISOR_arch_3 51 +#define __HYPERVISOR_arch_4 52 +#define __HYPERVISOR_arch_5 53 +#define __HYPERVISOR_arch_6 54 +#define __HYPERVISOR_arch_7 55 + +/* ` } */ + +/* + * HYPERCALL COMPATIBILITY. + */ + +/* New sched_op hypercall introduced in 0x00030101. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030101 +#undef __HYPERVISOR_sched_op +#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat +#endif + +/* New event-channel and physdev hypercalls introduced in 0x00030202. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030202 +#undef __HYPERVISOR_event_channel_op +#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat +#undef __HYPERVISOR_physdev_op +#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat +#endif + +/* New platform_op hypercall introduced in 0x00030204. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030204 +#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op +#endif + +/* + * VIRTUAL INTERRUPTS + * + * Virtual interrupts that a guest OS may receive from Xen. + * + * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a + * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. + * The latter can be allocated only once per guest: they must initially be + * allocated to VCPU0 but can subsequently be re-bound. + */ +/* ` enum virq { */ +#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ +#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ +#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ +#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ +#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ +#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ +#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ +#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ +#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ +#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ +#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ +#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ + +/* Architecture-specific VIRQ definitions. */ +#define VIRQ_ARCH_0 16 +#define VIRQ_ARCH_1 17 +#define VIRQ_ARCH_2 18 +#define VIRQ_ARCH_3 19 +#define VIRQ_ARCH_4 20 +#define VIRQ_ARCH_5 21 +#define VIRQ_ARCH_6 22 +#define VIRQ_ARCH_7 23 +/* ` } */ + +#define NR_VIRQS 24 + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[], + * ` unsigned count, unsigned *done_out, + * ` unsigned foreigndom) + * ` + * @reqs is an array of mmu_update_t structures ((ptr, val) pairs). + * @count is the length of the above array. + * @pdone is an output parameter indicating number of completed operations + * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this + * hypercall invocation. Can be DOMID_SELF. + * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced + * in this hypercall invocation. The value of this field + * (x) encodes the PFD as follows: + * x == 0 => PFD == DOMID_SELF + * x != 0 => PFD == x - 1 + * + * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command. + * ------------- + * ptr[1:0] == MMU_NORMAL_PT_UPDATE: + * Updates an entry in a page table belonging to PFD. If updating an L1 table, + * and the new table entry is valid/present, the mapped frame must belong to + * FD. If attempting to map an I/O page then the caller assumes the privilege + * of the FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the page-table entry to modify. + * val -- Value to write. + * + * There also certain implicit requirements when using this hypercall. The + * pages that make up a pagetable must be mapped read-only in the guest. + * This prevents uncontrolled guest updates to the pagetable. Xen strictly + * enforces this, and will disallow any pagetable update which will end up + * mapping pagetable page RW, and will disallow using any writable page as a + * pagetable. In practice it means that when constructing a page table for a + * process, thread, etc, we MUST be very dilligient in following these rules: + * 1). Start with top-level page (PGD or in Xen language: L4). Fill out + * the entries. + * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD + * or L2). + * 3). Start filling out the PTE table (L1) with the PTE entries. Once + * done, make sure to set each of those entries to RO (so writeable bit + * is unset). Once that has been completed, set the PMD (L2) for this + * PTE table as RO. + * 4). When completed with all of the PMD (L2) entries, and all of them have + * been set to RO, make sure to set RO the PUD (L3). Do the same + * operation on PGD (L4) pagetable entries that have a PUD (L3) entry. + * 5). Now before you can use those pages (so setting the cr3), you MUST also + * pin them so that the hypervisor can verify the entries. This is done + * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame + * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op( + * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be + * issued. + * For 32-bit guests, the L4 is not used (as there is less pagetables), so + * instead use L3. + * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE + * hypercall. Also if so desired the OS can also try to write to the PTE + * and be trapped by the hypervisor (as the PTE entry is RO). + * + * To deallocate the pages, the operations are the reverse of the steps + * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the + * pagetable MUST not be in use (meaning that the cr3 is not set to it). + * + * ptr[1:0] == MMU_MACHPHYS_UPDATE: + * Updates an entry in the machine->pseudo-physical mapping table. + * ptr[:2] -- Machine address within the frame whose mapping to modify. + * The frame must belong to the FD, if one is specified. + * val -- Value to write into the mapping entry. + * + * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: + * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed + * with those in @val. + * + * @val is usually the machine frame number along with some attributes. + * The attributes by default follow the architecture defined bits. Meaning that + * if this is a X86_64 machine and four page table layout is used, the layout + * of val is: + * - 63 if set means No execute (NX) + * - 46-13 the machine frame number + * - 12 available for guest + * - 11 available for guest + * - 10 available for guest + * - 9 available for guest + * - 8 global + * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages) + * - 6 dirty + * - 5 accessed + * - 4 page cached disabled + * - 3 page write through + * - 2 userspace accessible + * - 1 writeable + * - 0 present + * + * The one bits that does not fit with the default layout is the PAGE_PSE + * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the + * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB + * (or 2MB) instead of using the PAGE_PSE bit. + * + * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen + * using it as the Page Attribute Table (PAT) bit - for details on it please + * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of + * pages instead of using MTRRs. + * + * The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits): + * PAT4 PAT0 + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots) + * +-----+-----+----+----+----+-----+----+----+ + * | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen + * +-----+-----+----+----+----+-----+----+----+ + * + * The lookup of this index table translates to looking up + * Bit 7, Bit 4, and Bit 3 of val entry: + * + * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3). + * + * If all bits are off, then we are using PAT0. If bit 3 turned on, + * then we are using PAT1, if bit 3 and bit 4, then PAT2.. + * + * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means + * that if a guest that follows Linux's PAT setup and would like to set Write + * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is + * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the + * caching as: + * + * WB = none (so PAT0) + * WC = PWT (bit 3 on) + * UC = PWT | PCD (bit 3 and 4 are on). + * + * To make it work with Xen, it needs to translate the WC bit as so: + * + * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3 + * + * And to translate back it would: + * + * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. + */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ + +/* + * MMU EXTENDED OPERATIONS + * + * ` enum neg_errnoval + * ` HYPERVISOR_mmuext_op(mmuext_op_t uops[], + * ` unsigned int count, + * ` unsigned int *pdone, + * ` unsigned int foreigndom) + */ +/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. + * + * cmd: MMUEXT_(UN)PIN_*_TABLE + * mfn: Machine frame number to be (un)pinned as a p.t. page. + * The frame must belong to the FD, if one is specified. + * + * cmd: MMUEXT_NEW_BASEPTR + * mfn: Machine frame number of new page-table base to install in MMU. + * + * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only] + * mfn: Machine frame number of new page-table base to install in MMU + * when in user space. + * + * cmd: MMUEXT_TLB_FLUSH_LOCAL + * No additional arguments. Flushes local TLB. + * + * cmd: MMUEXT_INVLPG_LOCAL + * linear_addr: Linear address to be flushed from the local TLB. + * + * cmd: MMUEXT_TLB_FLUSH_MULTI + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_INVLPG_MULTI + * linear_addr: Linear address to be flushed. + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_TLB_FLUSH_ALL + * No additional arguments. Flushes all VCPUs' TLBs. + * + * cmd: MMUEXT_INVLPG_ALL + * linear_addr: Linear address to be flushed from all VCPUs' TLBs. + * + * cmd: MMUEXT_FLUSH_CACHE + * No additional arguments. Writes back and flushes cache contents. + * + * cmd: MMUEXT_FLUSH_CACHE_GLOBAL + * No additional arguments. Writes back and flushes cache contents + * on all CPUs in the system. + * + * cmd: MMUEXT_SET_LDT + * linear_addr: Linear address of LDT base (NB. must be page-aligned). + * nr_ents: Number of entries in LDT. + * + * cmd: MMUEXT_CLEAR_PAGE + * mfn: Machine frame number to be cleared. + * + * cmd: MMUEXT_COPY_PAGE + * mfn: Machine frame number of the destination page. + * src_mfn: Machine frame number of the source page. + * + * cmd: MMUEXT_[UN]MARK_SUPER + * mfn: Machine frame number of head of superpage to be [un]marked. + */ +/* ` enum mmuext_cmd { */ +#define MMUEXT_PIN_L1_TABLE 0 +#define MMUEXT_PIN_L2_TABLE 1 +#define MMUEXT_PIN_L3_TABLE 2 +#define MMUEXT_PIN_L4_TABLE 3 +#define MMUEXT_UNPIN_TABLE 4 +#define MMUEXT_NEW_BASEPTR 5 +#define MMUEXT_TLB_FLUSH_LOCAL 6 +#define MMUEXT_INVLPG_LOCAL 7 +#define MMUEXT_TLB_FLUSH_MULTI 8 +#define MMUEXT_INVLPG_MULTI 9 +#define MMUEXT_TLB_FLUSH_ALL 10 +#define MMUEXT_INVLPG_ALL 11 +#define MMUEXT_FLUSH_CACHE 12 +#define MMUEXT_SET_LDT 13 +#define MMUEXT_NEW_USER_BASEPTR 15 +#define MMUEXT_CLEAR_PAGE 16 +#define MMUEXT_COPY_PAGE 17 +#define MMUEXT_FLUSH_CACHE_GLOBAL 18 +#define MMUEXT_MARK_SUPER 19 +#define MMUEXT_UNMARK_SUPER 20 +/* ` } */ + +#ifndef __ASSEMBLY__ +struct mmuext_op { + unsigned int cmd; /* => enum mmuext_cmd */ + union { + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR + * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ + xen_pfn_t mfn; + /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ + unsigned long linear_addr; + } arg1; + union { + /* SET_LDT */ + unsigned int nr_ents; + /* TLB_FLUSH_MULTI, INVLPG_MULTI */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(const_void) vcpumask; +#else + const void *vcpumask; +#endif + /* COPY_PAGE */ + xen_pfn_t src_mfn; + } arg2; +}; +typedef struct mmuext_op mmuext_op_t; +DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); +#endif + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_update_va_mapping(unsigned long va, u64 val, + * ` enum uvm_flags flags) + * ` + * ` enum neg_errnoval + * ` HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, u64 val, + * ` enum uvm_flags flags, + * ` domid_t domid) + * ` + * ` @va: The virtual address whose mapping we want to change + * ` @val: The new page table entry, must contain a machine address + * ` @flags: Control TLB flushes + */ +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ +/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ +/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ +/* ` enum uvm_flags { */ +#define UVMF_NONE (0UL<<0) /* No flushing at all. */ +#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ +#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ +#define UVMF_FLUSHTYPE_MASK (3UL<<0) +#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ +#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ +#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ +/* ` } */ + +/* + * Commands to HYPERVISOR_console_io(). + */ +#define CONSOLEIO_write 0 +#define CONSOLEIO_read 1 + +/* + * Commands to HYPERVISOR_vm_assist(). + */ +#define VMASST_CMD_enable 0 +#define VMASST_CMD_disable 1 + +/* x86/32 guests: simulate full 4GB segment limits. */ +#define VMASST_TYPE_4gb_segments 0 + +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ +#define VMASST_TYPE_4gb_segments_notify 1 + +/* + * x86 guests: support writes to bottom-level PTEs. + * NB1. Page-directory entries cannot be written. + * NB2. Guest must continue to remove all writable mappings of PTEs. + */ +#define VMASST_TYPE_writable_pagetables 2 + +/* x86/PAE guests: support PDPTs above 4GB. */ +#define VMASST_TYPE_pae_extended_cr3 3 + +#define MAX_VMASST_TYPE 3 + +#ifndef __ASSEMBLY__ + +typedef uint16_t domid_t; + +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ +#define DOMID_FIRST_RESERVED (0x7FF0U) + +/* DOMID_SELF is used in certain contexts to refer to oneself. */ +#define DOMID_SELF (0x7FF0U) + +/* + * DOMID_IO is used to restrict page-table updates to mapping I/O memory. + * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO + * is useful to ensure that no mappings to the OS's own heap are accidentally + * installed. (e.g., in Linux this could cause havoc as reference counts + * aren't adjusted on the I/O-mapping code path). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can + * be specified by any calling domain. + */ +#define DOMID_IO (0x7FF1U) + +/* + * DOMID_XEN is used to allow privileged domains to map restricted parts of + * Xen's heap space (e.g., the machine_to_phys table). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if + * the caller is privileged. + */ +#define DOMID_XEN (0x7FF2U) + +/* + * DOMID_COW is used as the owner of sharable pages */ +#define DOMID_COW (0x7FF3U) + +/* DOMID_INVALID is used to identify pages with unknown owner. */ +#define DOMID_INVALID (0x7FF4U) + +/* Idle domain. */ +#define DOMID_IDLE (0x7FFFU) + +/* + * Send an array of these to HYPERVISOR_mmu_update(). + * NB. The fields are natural pointer/address size for this architecture. + */ +struct mmu_update { + uint64_t ptr; /* Machine address of PTE. */ + uint64_t val; /* New contents of PTE. */ +}; +typedef struct mmu_update mmu_update_t; +DEFINE_XEN_GUEST_HANDLE(mmu_update_t); + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_multicall(multicall_entry_t call_list[], + * ` uint32_t nr_calls); + * + * NB. The fields are logically the natural register size for this + * architecture. In cases where xen_ulong_t is larger than this then + * any unused bits in the upper portion must be zero. + */ +struct multicall_entry { + xen_ulong_t op, result; + xen_ulong_t args[6]; +}; +typedef struct multicall_entry multicall_entry_t; +DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00040400 +/* + * Event channel endpoints per domain (when using the 2-level ABI): + * 1024 if a long is 32 bits; 4096 if a long is 64 bits. + */ +#define NR_EVENT_CHANNELS EVTCHN_2L_NR_CHANNELS +#endif + +struct vcpu_time_info { + /* + * Updates to the following values are preceded and followed by an + * increment of 'version'. The guest can therefore detect updates by + * looking for changes to 'version'. If the least-significant bit of + * the version number is set then an update is in progress and the guest + * must wait to read a consistent set of values. + * The correct way to interact with the version number is similar to + * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry. + */ + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + + * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad1[3]; +}; /* 32 bytes */ +typedef struct vcpu_time_info vcpu_time_info_t; + +struct vcpu_info { + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + uint8_t evtchn_upcall_pending; +#ifdef XEN_HAVE_PV_UPCALL_MASK + uint8_t evtchn_upcall_mask; +#else /* XEN_HAVE_PV_UPCALL_MASK */ + uint8_t pad0; +#endif /* XEN_HAVE_PV_UPCALL_MASK */ + xen_ulong_t evtchn_pending_sel; + struct arch_vcpu_info arch; + struct vcpu_time_info time; +}; /* 64 bytes (x86) */ +#ifndef __XEN__ +typedef struct vcpu_info vcpu_info_t; +#endif + +/* + * `incontents 200 startofday_shared Start-of-day shared data structure + * Xen/kernel shared data -- pointer provided in start_info. + * + * This structure is defined to be both smaller than a page, and the + * only data on the shared page, but may vary in actual size even within + * compatible Xen versions; guests should not rely on the size + * of this structure remaining constant. + */ +struct shared_info { + struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; + + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C long' in the PENDING bitfield array. + */ + xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; + xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ + uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ + uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ + uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ + + struct arch_shared_info arch; + +}; +#ifndef __XEN__ +typedef struct shared_info shared_info_t; +#endif + +/* + * `incontents 200 startofday Start-of-day memory layout + * + * 1. The domain is started within contiguous virtual-memory region. + * 2. The contiguous region ends on an aligned 4MB boundary. + * 3. This the order of bootstrap elements in the initial virtual region: + * a. relocated kernel image + * b. initial ram disk [mod_start, mod_len] + * c. list of allocated page frames [mfn_list, nr_pages] + * (unless relocated due to XEN_ELFNOTE_INIT_P2M) + * d. start_info_t structure [register ESI (x86)] + * e. bootstrap page tables [pt_base and CR3 (x86)] + * f. bootstrap stack [register ESP (x86)] + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. + * 5. The initial ram disk may be omitted. + * 6. The list of page frames forms a contiguous 'pseudo-physical' memory + * layout for the domain. In particular, the bootstrap virtual-memory + * region is a 1:1 mapping to the first section of the pseudo-physical map. + * 7. All bootstrap elements are mapped read-writable for the guest OS. The + * only exception is the bootstrap page table, which is mapped read-only. + * 8. There is guaranteed to be at least 512kB padding after the final + * bootstrap element. If necessary, the bootstrap virtual region is + * extended by an extra 4MB to ensure this. + * + * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page + * table layout") a bug caused the pt_base (3.e above) and cr3 to not point + * to the start of the guest page tables (it was offset by two pages). + * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU + * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got + * allocated in the order: 'first L1','first L2', 'first L3', so the offset + * to the page table base is by two pages back. The initial domain if it is + * 32-bit and runs under a 64-bit hypervisor should _NOT_ use two of the + * pages preceding pt_base and mark them as reserved/unused. + */ +#ifdef XEN_HAVE_PV_GUEST_ENTRY +struct start_info { + /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "xen--". */ + unsigned long nr_pages; /* Total pages allocated to this domain. */ + unsigned long shared_info; /* MACHINE address of shared info struct. */ + uint32_t flags; /* SIF_xxx flags. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ + uint32_t store_evtchn; /* Event channel for store communication. */ + union { + struct { + xen_pfn_t mfn; /* MACHINE page number of console page. */ + uint32_t evtchn; /* Event channel for console page. */ + } domU; + struct { + uint32_t info_off; /* Offset of console_info struct. */ + uint32_t info_size; /* Size of console_info struct from start.*/ + } dom0; + } console; + /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ + unsigned long pt_base; /* VIRTUAL address of page directory. */ + unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ + unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module */ + /* (PFN of pre-loaded module if */ + /* SIF_MOD_START_PFN set in flags). */ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ +#define MAX_GUEST_CMDLINE 1024 + int8_t cmd_line[MAX_GUEST_CMDLINE]; + /* The pfn range here covers both page table and p->m table frames. */ + unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */ + unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */ +}; +typedef struct start_info start_info_t; + +/* New console union for dom0 introduced in 0x00030203. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +#define console_mfn console.domU.mfn +#define console_evtchn console.domU.evtchn +#endif +#endif /* XEN_HAVE_PV_GUEST_ENTRY */ + +/* These flags are passed in the 'flags' field of start_info_t. */ +#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ +#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ +#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ +#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ + +/* + * A multiboot module is a package containing modules very similar to a + * multiboot module array. The only differences are: + * - the array of module descriptors is by convention simply at the beginning + * of the multiboot module, + * - addresses in the module descriptors are based on the beginning of the + * multiboot module, + * - the number of modules is determined by a termination descriptor that has + * mod_start == 0. + * + * This permits to both build it statically and reference it in a configuration + * file, and let the PV guest easily rebase the addresses to virtual addresses + * and at the same time count the number of modules. + */ +struct xen_multiboot_mod_list +{ + /* Address of first byte of the module */ + uint32_t mod_start; + /* Address of last byte of the module (inclusive) */ + uint32_t mod_end; + /* Address of zero-terminated command line */ + uint32_t cmdline; + /* Unused, must be zero */ + uint32_t pad; +}; +/* + * `incontents 200 startofday_dom0_console Dom0_console + * + * The console structure in start_info.console.dom0 + * + * This structure includes a variety of information required to + * have a working VGA/VESA console. + */ +typedef struct dom0_vga_console_info { + uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ +#define XEN_VGATYPE_TEXT_MODE_3 0x03 +#define XEN_VGATYPE_VESA_LFB 0x23 +#define XEN_VGATYPE_EFI_LFB 0x70 + + union { + struct { + /* Font height, in pixels. */ + uint16_t font_height; + /* Cursor location (column, row). */ + uint16_t cursor_x, cursor_y; + /* Number of rows and columns (dimensions in characters). */ + uint16_t rows, columns; + } text_mode_3; + + struct { + /* Width and height, in pixels. */ + uint16_t width, height; + /* Bytes per scan line. */ + uint16_t bytes_per_line; + /* Bits per pixel. */ + uint16_t bits_per_pixel; + /* LFB physical address, and size (in units of 64kB). */ + uint32_t lfb_base; + uint32_t lfb_size; + /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ + uint8_t red_pos, red_size; + uint8_t green_pos, green_size; + uint8_t blue_pos, blue_size; + uint8_t rsvd_pos, rsvd_size; +#if __XEN_INTERFACE_VERSION__ >= 0x00030206 + /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ + uint32_t gbl_caps; + /* Mode attributes (offset 0x0, VESA command 0x4f01). */ + uint16_t mode_attrs; +#endif + } vesa_lfb; + } u; +} dom0_vga_console_info_t; +#define xen_vga_console_info dom0_vga_console_info +#define xen_vga_console_info_t dom0_vga_console_info_t + +typedef uint8_t xen_domain_handle_t[16]; + +/* Turn a plain number into a C unsigned long constant. */ +#define __mk_unsigned_long(x) x ## UL +#define mk_unsigned_long(x) __mk_unsigned_long(x) + +__DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t); +__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t); +__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t); +__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t); + +#else /* __ASSEMBLY__ */ + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define mk_unsigned_long(x) x + +#endif /* !__ASSEMBLY__ */ + +/* Default definitions for macros used by domctl/sysctl. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif +#ifndef XEN_GUEST_HANDLE_64 +#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) +#endif + +#ifndef __ASSEMBLY__ +struct xenctl_bitmap { + XEN_GUEST_HANDLE_64(uint8) bitmap; + uint32_t nr_bits; +}; +#endif + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#endif /* __XEN_PUBLIC_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/xenoprof.h xen-4.9.2/extras/mini-os/include/xen/xenoprof.h --- xen-4.9.0/extras/mini-os/include/xen/xenoprof.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/xenoprof.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,152 @@ +/****************************************************************************** + * xenoprof.h + * + * Interface for enabling system wide profiling based on hardware performance + * counters + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Hewlett-Packard Co. + * Written by Aravind Menon & Jose Renato Santos + */ + +#ifndef __XEN_PUBLIC_XENOPROF_H__ +#define __XEN_PUBLIC_XENOPROF_H__ + +#include "xen.h" + +/* + * Commands to HYPERVISOR_xenoprof_op(). + */ +#define XENOPROF_init 0 +#define XENOPROF_reset_active_list 1 +#define XENOPROF_reset_passive_list 2 +#define XENOPROF_set_active 3 +#define XENOPROF_set_passive 4 +#define XENOPROF_reserve_counters 5 +#define XENOPROF_counter 6 +#define XENOPROF_setup_events 7 +#define XENOPROF_enable_virq 8 +#define XENOPROF_start 9 +#define XENOPROF_stop 10 +#define XENOPROF_disable_virq 11 +#define XENOPROF_release_counters 12 +#define XENOPROF_shutdown 13 +#define XENOPROF_get_buffer 14 +#define XENOPROF_set_backtrace 15 + +/* AMD IBS support */ +#define XENOPROF_get_ibs_caps 16 +#define XENOPROF_ibs_counter 17 +#define XENOPROF_last_op 17 + +#define MAX_OPROF_EVENTS 32 +#define MAX_OPROF_DOMAINS 25 +#define XENOPROF_CPU_TYPE_SIZE 64 + +/* Xenoprof performance events (not Xen events) */ +struct event_log { + uint64_t eip; + uint8_t mode; + uint8_t event; +}; + +/* PC value that indicates a special code */ +#define XENOPROF_ESCAPE_CODE (~0ULL) +/* Transient events for the xenoprof->oprofile cpu buf */ +#define XENOPROF_TRACE_BEGIN 1 + +/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */ +struct xenoprof_buf { + uint32_t event_head; + uint32_t event_tail; + uint32_t event_size; + uint32_t vcpu_id; + uint64_t xen_samples; + uint64_t kernel_samples; + uint64_t user_samples; + uint64_t lost_samples; + struct event_log event_log[1]; +}; +#ifndef __XEN__ +typedef struct xenoprof_buf xenoprof_buf_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); +#endif + +struct xenoprof_init { + int32_t num_events; + int32_t is_primary; + char cpu_type[XENOPROF_CPU_TYPE_SIZE]; +}; +typedef struct xenoprof_init xenoprof_init_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t); + +struct xenoprof_get_buffer { + int32_t max_samples; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_gmaddr; +}; +typedef struct xenoprof_get_buffer xenoprof_get_buffer_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t); + +struct xenoprof_counter { + uint32_t ind; + uint64_t count; + uint32_t enabled; + uint32_t event; + uint32_t hypervisor; + uint32_t kernel; + uint32_t user; + uint64_t unit_mask; +}; +typedef struct xenoprof_counter xenoprof_counter_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t); + +typedef struct xenoprof_passive { + uint16_t domain_id; + int32_t max_samples; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_gmaddr; +} xenoprof_passive_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t); + +struct xenoprof_ibs_counter { + uint64_t op_enabled; + uint64_t fetch_enabled; + uint64_t max_cnt_fetch; + uint64_t max_cnt_op; + uint64_t rand_en; + uint64_t dispatched_ops; +}; +typedef struct xenoprof_ibs_counter xenoprof_ibs_counter_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_ibs_counter_t); + +#endif /* __XEN_PUBLIC_XENOPROF_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/include/xen/xsm/flask_op.h xen-4.9.2/extras/mini-os/include/xen/xsm/flask_op.h --- xen-4.9.0/extras/mini-os/include/xen/xsm/flask_op.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xen/xsm/flask_op.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,201 @@ +/* + * This file contains the flask_op hypercall commands and definitions. + * + * Author: George Coker, + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __FLASK_OP_H__ +#define __FLASK_OP_H__ + +#define XEN_FLASK_INTERFACE_VERSION 1 + +struct xen_flask_load { + XEN_GUEST_HANDLE(char) buffer; + uint32_t size; +}; + +struct xen_flask_setenforce { + uint32_t enforcing; +}; + +struct xen_flask_sid_context { + /* IN/OUT: sid to convert to/from string */ + uint32_t sid; + /* IN: size of the context buffer + * OUT: actual size of the output context string + */ + uint32_t size; + XEN_GUEST_HANDLE(char) context; +}; + +struct xen_flask_access { + /* IN: access request */ + uint32_t ssid; + uint32_t tsid; + uint32_t tclass; + uint32_t req; + /* OUT: AVC data */ + uint32_t allowed; + uint32_t audit_allow; + uint32_t audit_deny; + uint32_t seqno; +}; + +struct xen_flask_transition { + /* IN: transition SIDs and class */ + uint32_t ssid; + uint32_t tsid; + uint32_t tclass; + /* OUT: new SID */ + uint32_t newsid; +}; + +struct xen_flask_userlist { + /* IN: starting SID for list */ + uint32_t start_sid; + /* IN: size of user string and output buffer + * OUT: number of SIDs returned */ + uint32_t size; + union { + /* IN: user to enumerate SIDs */ + XEN_GUEST_HANDLE(char) user; + /* OUT: SID list */ + XEN_GUEST_HANDLE(uint32) sids; + } u; +}; + +struct xen_flask_boolean { + /* IN/OUT: numeric identifier for boolean [GET/SET] + * If -1, name will be used and bool_id will be filled in. */ + uint32_t bool_id; + /* OUT: current enforcing value of boolean [GET/SET] */ + uint8_t enforcing; + /* OUT: pending value of boolean [GET/SET] */ + uint8_t pending; + /* IN: new value of boolean [SET] */ + uint8_t new_value; + /* IN: commit new value instead of only setting pending [SET] */ + uint8_t commit; + /* IN: size of boolean name buffer [GET/SET] + * OUT: actual size of name [GET only] */ + uint32_t size; + /* IN: if bool_id is -1, used to find boolean [GET/SET] + * OUT: textual name of boolean [GET only] + */ + XEN_GUEST_HANDLE(char) name; +}; + +struct xen_flask_setavc_threshold { + /* IN */ + uint32_t threshold; +}; + +struct xen_flask_hash_stats { + /* OUT */ + uint32_t entries; + uint32_t buckets_used; + uint32_t buckets_total; + uint32_t max_chain_len; +}; + +struct xen_flask_cache_stats { + /* IN */ + uint32_t cpu; + /* OUT */ + uint32_t lookups; + uint32_t hits; + uint32_t misses; + uint32_t allocations; + uint32_t reclaims; + uint32_t frees; +}; + +struct xen_flask_ocontext { + /* IN */ + uint32_t ocon; + uint32_t sid; + uint64_t low, high; +}; + +struct xen_flask_peersid { + /* IN */ + evtchn_port_t evtchn; + /* OUT */ + uint32_t sid; +}; + +struct xen_flask_relabel { + /* IN */ + uint32_t domid; + uint32_t sid; +}; + +struct xen_flask_op { + uint32_t cmd; +#define FLASK_LOAD 1 +#define FLASK_GETENFORCE 2 +#define FLASK_SETENFORCE 3 +#define FLASK_CONTEXT_TO_SID 4 +#define FLASK_SID_TO_CONTEXT 5 +#define FLASK_ACCESS 6 +#define FLASK_CREATE 7 +#define FLASK_RELABEL 8 +#define FLASK_USER 9 +#define FLASK_POLICYVERS 10 +#define FLASK_GETBOOL 11 +#define FLASK_SETBOOL 12 +#define FLASK_COMMITBOOLS 13 +#define FLASK_MLS 14 +#define FLASK_DISABLE 15 +#define FLASK_GETAVC_THRESHOLD 16 +#define FLASK_SETAVC_THRESHOLD 17 +#define FLASK_AVC_HASHSTATS 18 +#define FLASK_AVC_CACHESTATS 19 +#define FLASK_MEMBER 20 +#define FLASK_ADD_OCONTEXT 21 +#define FLASK_DEL_OCONTEXT 22 +#define FLASK_GET_PEER_SID 23 +#define FLASK_RELABEL_DOMAIN 24 + uint32_t interface_version; /* XEN_FLASK_INTERFACE_VERSION */ + union { + struct xen_flask_load load; + struct xen_flask_setenforce enforce; + /* FLASK_CONTEXT_TO_SID and FLASK_SID_TO_CONTEXT */ + struct xen_flask_sid_context sid_context; + struct xen_flask_access access; + /* FLASK_CREATE, FLASK_RELABEL, FLASK_MEMBER */ + struct xen_flask_transition transition; + struct xen_flask_userlist userlist; + /* FLASK_GETBOOL, FLASK_SETBOOL */ + struct xen_flask_boolean boolean; + struct xen_flask_setavc_threshold setavc_threshold; + struct xen_flask_hash_stats hash_stats; + struct xen_flask_cache_stats cache_stats; + /* FLASK_ADD_OCONTEXT, FLASK_DEL_OCONTEXT */ + struct xen_flask_ocontext ocontext; + struct xen_flask_peersid peersid; + struct xen_flask_relabel relabel; + } u; +}; +typedef struct xen_flask_op xen_flask_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_flask_op_t); + +#endif diff -Nru xen-4.9.0/extras/mini-os/include/xenbus.h xen-4.9.2/extras/mini-os/include/xenbus.h --- xen-4.9.0/extras/mini-os/include/xenbus.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xenbus.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,128 @@ +#ifndef XENBUS_H__ +#define XENBUS_H__ + +#include + +typedef unsigned long xenbus_transaction_t; +#define XBT_NIL ((xenbus_transaction_t)0) + +#ifdef CONFIG_XENBUS +extern uint32_t xenbus_evtchn; + +/* Initialize the XenBus system. */ +void init_xenbus(void); +void get_xenbus(void *p); +#else +#define xenbus_evtchn ~0 + +static inline void init_xenbus(void) +{ +} +static inline void get_xenbus(void *p) +{ +} +#endif + +/* Read the value associated with a path. Returns a malloc'd error + string on failure and sets *value to NULL. On success, *value is + set to a malloc'd copy of the value. */ +char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value); + +/* Watch event queue */ +struct xenbus_event { + /* Keep these two as this for xs.c */ + char *path; + char *token; + struct xenbus_event *next; +}; +typedef struct xenbus_event *xenbus_event_queue; + +char *xenbus_watch_path_token(xenbus_transaction_t xbt, const char *path, const char *token, xenbus_event_queue *events); +char *xenbus_unwatch_path_token(xenbus_transaction_t xbt, const char *path, const char *token); +extern struct wait_queue_head xenbus_watch_queue; +void xenbus_wait_for_watch(xenbus_event_queue *queue); +char **xenbus_wait_for_watch_return(xenbus_event_queue *queue); +char* xenbus_wait_for_value(const char *path, const char *value, xenbus_event_queue *queue); +char *xenbus_wait_for_state_change(const char* path, XenbusState *state, xenbus_event_queue *queue); +char *xenbus_switch_state(xenbus_transaction_t xbt, const char* path, XenbusState state); + +/* When no token is provided, use a global queue. */ +#define XENBUS_WATCH_PATH_TOKEN "xenbus_watch_path" +extern xenbus_event_queue xenbus_events; +#define xenbus_watch_path(xbt, path) xenbus_watch_path_token(xbt, path, XENBUS_WATCH_PATH_TOKEN, NULL) +#define xenbus_unwatch_path(xbt, path) xenbus_unwatch_path_token(xbt, path, XENBUS_WATCH_PATH_TOKEN) + + +/* Associates a value with a path. Returns a malloc'd error string on + failure. */ +char *xenbus_write(xenbus_transaction_t xbt, const char *path, const char *value); + +struct write_req { + const void *data; + unsigned len; +}; + +/* Send a message to xenbus, in the same fashion as xb_write, and + block waiting for a reply. The reply is malloced and should be + freed by the caller. */ +struct xsd_sockmsg * +xenbus_msg_reply(int type, + xenbus_transaction_t trans, + struct write_req *io, + int nr_reqs); + +/* Removes the value associated with a path. Returns a malloc'd error + string on failure. */ +char *xenbus_rm(xenbus_transaction_t xbt, const char *path); + +/* List the contents of a directory. Returns a malloc'd error string + on failure and sets *contents to NULL. On success, *contents is + set to a malloc'd array of pointers to malloc'd strings. The array + is NULL terminated. May block. */ +char *xenbus_ls(xenbus_transaction_t xbt, const char *prefix, char ***contents); + +/* Reads permissions associated with a path. Returns a malloc'd error + string on failure and sets *value to NULL. On success, *value is + set to a malloc'd copy of the value. */ +char *xenbus_get_perms(xenbus_transaction_t xbt, const char *path, char **value); + +/* Sets the permissions associated with a path. Returns a malloc'd + error string on failure. */ +char *xenbus_set_perms(xenbus_transaction_t xbt, const char *path, domid_t dom, char perm); + +/* Start a xenbus transaction. Returns the transaction in xbt on + success or a malloc'd error string otherwise. */ +char *xenbus_transaction_start(xenbus_transaction_t *xbt); + +/* End a xenbus transaction. Returns a malloc'd error string if it + fails. abort says whether the transaction should be aborted. + Returns 1 in *retry iff the transaction should be retried. */ +char *xenbus_transaction_end(xenbus_transaction_t, int abort, + int *retry); + +/* Read path and parse it as an integer. Returns -1 on error. */ +int xenbus_read_integer(const char *path); + +/* Read path and parse it as 16 byte uuid. Returns 1 if + * read and parsing were successful, 0 if not */ +int xenbus_read_uuid(const char* path, unsigned char uuid[16]); + +/* Contraction of snprintf and xenbus_write(path/node). */ +char* xenbus_printf(xenbus_transaction_t xbt, + const char* node, const char* path, + const char* fmt, ...) + __attribute__((__format__(printf, 4, 5))); + +/* Utility function to figure out our domain id */ +domid_t xenbus_get_self_id(void); + +#ifdef CONFIG_XENBUS +/* Reset the XenBus system. */ +void fini_xenbus(void); +#else +static inline void fini_xenbus(void) +{ +} +#endif + +#endif /* XENBUS_H__ */ diff -Nru xen-4.9.0/extras/mini-os/include/xmalloc.h xen-4.9.2/extras/mini-os/include/xmalloc.h --- xen-4.9.0/extras/mini-os/include/xmalloc.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/include/xmalloc.h 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,44 @@ +#ifndef __XMALLOC_H__ +#define __XMALLOC_H__ + +#ifdef HAVE_LIBC + +#include +#include +/* Allocate space for typed object. */ +#define _xmalloc(size, align) memalign(align, size) +#define xfree(ptr) free(ptr) + +#else + +#include + +#define DEFAULT_ALIGN (sizeof(unsigned long)) + +extern void *malloc(size_t size); +extern void *realloc(void *ptr, size_t size); +extern void free(void *ptr); + +/* Free memory from any xmalloc*() call. */ +extern void xfree(const void *); + +/* Underlying functions */ +extern void *_xmalloc(size_t size, size_t align); + +#endif + +static inline void *_xmalloc_array(size_t size, size_t align, size_t num) +{ + /* Check for overflow. */ + if (size && num > UINT_MAX / size) + return NULL; + return _xmalloc(size * num, align); +} + +/* Allocate space for typed object. */ +#define xmalloc(_type) ((_type *)_xmalloc(sizeof(_type), __alignof__(_type))) + +/* Allocate space for array of typed objects. */ +#define xmalloc_array(_type, _num) ((_type *)_xmalloc_array(sizeof(_type), __alignof__(_type), _num)) + +#endif /* __XMALLOC_H__ */ diff -Nru xen-4.9.0/extras/mini-os/kernel.c xen-4.9.2/extras/mini-os/kernel.c --- xen-4.9.0/extras/mini-os/kernel.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/kernel.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,199 @@ +/****************************************************************************** + * kernel.c + * + * Assorted crap goes here, including the initial C entry point, jumped at + * from head.S. + * + * Copyright (c) 2002-2003, K A Fraser & R Neugebauer + * Copyright (c) 2005, Grzegorz Milos, Intel Research Cambridge + * Copyright (c) 2006, Robert Kaiser, FH Wiesbaden + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32]; +char cmdline[MAX_CMDLINE_SIZE]; + +void setup_xen_features(void) +{ + xen_feature_info_t fi; + int i, j; + + for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) + { + fi.submap_idx = i; + if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) + break; + + for (j=0; j<32; j++) + xen_features[i*32+j] = !!(fi.submap & 1< + +unsigned char _ctype[] = { +_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ +_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ +_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ +_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ +_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ +_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ +_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ +_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ +_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ +_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ +_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ +_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ +_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ +_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ +#endif diff -Nru xen-4.9.0/extras/mini-os/lib/math.c xen-4.9.2/extras/mini-os/lib/math.c --- xen-4.9.0/extras/mini-os/lib/math.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/lib/math.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,426 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: math.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Library functions for 64bit arith and other + * from freebsd, files in sys/libkern/ (qdivrem.c, etc) + * + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * +*/ + +#include +#include +#include + +/* XXX RN: Yuck hardcoded endianess :) */ +#define _QUAD_HIGHWORD 1 +#define _QUAD_LOWWORD 0 + +/* + * From + * @(#)quad.h 8.1 (Berkeley) 6/4/93 + */ + +/* + * Depending on the desired operation, we view a `long long' (aka quad_t) in + * one or more of the following formats. + */ +union uu { + quad_t q; /* as a (signed) quad */ + quad_t uq; /* as an unsigned quad */ + int32_t sl[2]; /* as two signed longs */ + uint32_t ul[2]; /* as two unsigned longs */ +}; + +/* + * Define high and low longwords. + */ +#define H _QUAD_HIGHWORD +#define L _QUAD_LOWWORD + +/* + * Total number of bits in an quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +#ifndef HAVE_LIBC +#define CHAR_BIT 8 /* number of bits in a char */ +#endif +#define QUAD_BITS (sizeof(quad_t) * CHAR_BIT) +#define LONG_BITS (sizeof(int32_t) * CHAR_BIT) +#define HALF_BITS (sizeof(int32_t) * CHAR_BIT / 2) + +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of int32_t, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be uint32_t.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(int32_t)*CHAR_BIT/2). + */ +#define HHALF(x) ((x) >> HALF_BITS) +#define LHALF(x) ((x) & ((1UL << HALF_BITS) - 1)) +#define LHUP(x) ((x) << HALF_BITS) + + +/* + * From + * qdivrem.c + */ + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ +#define B (1UL << HALF_BITS) /* digit base */ + +/* Combine two `digits' to make a single two-digit number. */ +#define COMBINE(a, b) (((uint32_t)(a) << HALF_BITS) | (b)) + +/* select a type for digits in base B: */ +typedef uint16_t digit; + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +static void +shl(register digit *p, register int len, register int sh) +{ + register int i; + + for (i = 0; i < len; i++) + p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); + p[i] = LHALF(p[i] << sh); +} + +/* + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within uint32_t. As a consequence, the maximum length dividend and + * divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +u_quad_t +__qdivrem(u_quad_t uq, u_quad_t vq, u_quad_t *arq) +{ + union uu tmp; + digit *u, *v, *q; + register digit v1, v2; + uint32_t qhat, rhat, t; + int m, n, d, j, i; + digit uspace[5], vspace[5], qspace[5]; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + static volatile const unsigned int zero = 0; + + tmp.ul[H] = tmp.ul[L] = 1 / zero; + if (arq) + *arq = uq; + return (tmp.q); + } + if (uq < vq) { + if (arq) + *arq = uq; + return (0); + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.uq = uq; + u[0] = 0; + u[1] = HHALF(tmp.ul[H]); + u[2] = LHALF(tmp.ul[H]); + u[3] = HHALF(tmp.ul[L]); + u[4] = LHALF(tmp.ul[L]); + tmp.uq = vq; + v[1] = HHALF(tmp.ul[H]); + v[2] = LHALF(tmp.ul[H]); + v[3] = HHALF(tmp.ul[L]); + v[4] = LHALF(tmp.ul[L]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + uint32_t rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = u[1] / t; + rbj = COMBINE(u[1] % t, u[2]); + q2 = rbj / t; + rbj = COMBINE(rbj % t, u[3]); + q3 = rbj / t; + rbj = COMBINE(rbj % t, u[4]); + q4 = rbj / t; + if (arq) + *arq = rbj % t; + tmp.ul[H] = COMBINE(q1, q2); + tmp.ul[L] = COMBINE(q3, q4); + return (tmp.q); + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) + m--; + for (i = 4 - m; --i >= 0;) + q[i] = 0; + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + d = 0; + for (t = v[1]; t < B / 2; t <<= 1) + d++; + if (d > 0) { + shl(&u[0], m + n, d); /* u <<= d */ + shl(&v[1], n - 1, d); /* v <<= d */ + } + /* + * D2: j = 0. + */ + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + register digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + uint32_t nn = COMBINE(uj0, uj1); + qhat = nn / v1; + rhat = nn % v1; + } + while (v2 * qhat > COMBINE(rhat, uj2)) { + qhat_too_big: + qhat--; + if ((rhat += v1) >= B) + break; + } + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = LHALF(t); + t = HHALF(t); + } + u[j] = LHALF(u[j] + t); + } + q[j] = qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + u[i] = (u[i] >> d) | + LHALF(u[i - 1] << (HALF_BITS - d)); + u[i] = 0; + } + tmp.ul[H] = COMBINE(uspace[1], uspace[2]); + tmp.ul[L] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.q; + } + + tmp.ul[H] = COMBINE(qspace[1], qspace[2]); + tmp.ul[L] = COMBINE(qspace[3], qspace[4]); + return (tmp.q); +} + +/* + * From + * divdi3.c + */ + +/* + * Divide two signed quads. + * ??? if -1/2 should produce -1 on this machine, this code is wrong + */ +quad_t +__divdi3(quad_t a, quad_t b) +{ + u_quad_t ua, ub, uq; + int neg; + + if (a < 0) + ua = -(u_quad_t)a, neg = 1; + else + ua = a, neg = 0; + if (b < 0) + ub = -(u_quad_t)b, neg ^= 1; + else + ub = b; + uq = __qdivrem(ua, ub, (u_quad_t *)0); + return (neg ? -uq : uq); +} + +/* + * From + * udivdi3.c + */ + +/* + * Divide two unsigned quads. + */ +u_quad_t +__udivdi3(u_quad_t a, u_quad_t b) +{ + return (__qdivrem(a, b, (u_quad_t *)0)); +} + +/* + * From + * umoddi3.c + */ + +/* + * Return remainder after dividing two unsigned quads. + */ +u_quad_t +__umoddi3(u_quad_t a, u_quad_t b) +{ + u_quad_t r; + + (void)__qdivrem(a, b, &r); + return (r); +} + +/* + * From + * moddi3.c + */ + +/* + * Return remainder after dividing two signed quads. + * + * XXX + * If -1/2 should produce -1 on this machine, this code is wrong. + */ +quad_t +__moddi3(quad_t a, quad_t b) +{ + u_quad_t ua, ub, ur; + int neg; + + if (a < 0) + ua = -(u_quad_t)a, neg = 1; + else + ua = a, neg = 0; + if (b < 0) + ub = -(u_quad_t)b; + else + ub = b; + (void)__qdivrem(ua, ub, &ur); + return (neg ? -ur : ur); +} diff -Nru xen-4.9.0/extras/mini-os/lib/printf.c xen-4.9.2/extras/mini-os/lib/printf.c --- xen-4.9.0/extras/mini-os/lib/printf.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/lib/printf.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,1235 @@ +/* + **************************************************************************** + * + * File: printf.c + * Author: Juergen Gross + * + * Date: Jun 2016 + * + * Environment: Xen Minimal OS + * Description: Library functions for printing + * (FreeBSD port) + * + **************************************************************************** + */ + +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Copyright (c) 2011 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if !defined HAVE_LIBC + +#include +#include +#include +#include +#include +#include +#include + +#define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) +/* 64 bits + 0-Byte at end */ +#define MAXNBUF 65 + +static char const hex2ascii_data[] = "0123456789abcdefghijklmnopqrstuvwxyz"; +/* + * Put a NUL-terminated ASCII number (base <= 36) in a buffer in reverse + * order; return an optional length and a pointer to the last character + * written in the buffer (i.e., the first character of the string). + * The buffer pointed to by `nbuf' must have length >= MAXNBUF. + */ +static char * +ksprintn(char *nbuf, uintmax_t num, int base, int *lenp, int upper) +{ + char *p, c; + + p = nbuf; + *p = '\0'; + do { + c = hex2ascii_data[num % base]; + *++p = upper ? toupper(c) : c; + } while (num /= base); + if (lenp) + *lenp = p - nbuf; + return (p); +} + +/* + * Convert a string to an unsigned long integer. + * + * Ignores `locale' stuff. Assumes that the upper and lower case + * alphabets and digits are each contiguous. + */ +unsigned long +strtoul(const char *nptr, char **endptr, int base) +{ + const char *s = nptr; + unsigned long acc; + unsigned char c; + unsigned long cutoff; + int neg = 0, any, cutlim; + + /* + * See strtol for comments as to the logic used. + */ + do { + c = *s++; + } while (isspace(c)); + if (c == '-') { + neg = 1; + c = *s++; + } else if (c == '+') + c = *s++; + if ((base == 0 || base == 16) && + c == '0' && (*s == 'x' || *s == 'X')) { + c = s[1]; + s += 2; + base = 16; + } + if (base == 0) + base = c == '0' ? 8 : 10; + cutoff = (unsigned long)ULONG_MAX / (unsigned long)base; + cutlim = (unsigned long)ULONG_MAX % (unsigned long)base; + for (acc = 0, any = 0;; c = *s++) { + if (!isascii(c)) + break; + if (isdigit(c)) + c -= '0'; + else if (isalpha(c)) + c -= isupper(c) ? 'A' - 10 : 'a' - 10; + else + break; + if (c >= base) + break; + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) + any = -1; + else { + any = 1; + acc *= base; + acc += c; + } + } + if (any < 0) { + acc = ULONG_MAX; + } else if (neg) + acc = -acc; + if (endptr != 0) + *endptr = __DECONST(char *, any ? s - 1 : nptr); + return (acc); +} + +/* + * Convert a string to a quad integer. + * + * Ignores `locale' stuff. Assumes that the upper and lower case + * alphabets and digits are each contiguous. + */ +int64_t +strtoq(const char *nptr, char **endptr, int base) +{ + const char *s; + uint64_t acc; + unsigned char c; + uint64_t qbase, cutoff; + int neg, any, cutlim; + + /* + * Skip white space and pick up leading +/- sign if any. + * If base is 0, allow 0x for hex and 0 for octal, else + * assume decimal; if base is already 16, allow 0x. + */ + s = nptr; + do { + c = *s++; + } while (isspace(c)); + if (c == '-') { + neg = 1; + c = *s++; + } else { + neg = 0; + if (c == '+') + c = *s++; + } + if ((base == 0 || base == 16) && + c == '0' && (*s == 'x' || *s == 'X')) { + c = s[1]; + s += 2; + base = 16; + } + if (base == 0) + base = c == '0' ? 8 : 10; + + /* + * Compute the cutoff value between legal numbers and illegal + * numbers. That is the largest legal value, divided by the + * base. An input number that is greater than this value, if + * followed by a legal input character, is too big. One that + * is equal to this value may be valid or not; the limit + * between valid and invalid numbers is then based on the last + * digit. For instance, if the range for quads is + * [-9223372036854775808..9223372036854775807] and the input base + * is 10, cutoff will be set to 922337203685477580 and cutlim to + * either 7 (neg==0) or 8 (neg==1), meaning that if we have + * accumulated a value > 922337203685477580, or equal but the + * next digit is > 7 (or 8), the number is too big, and we will + * return a range error. + * + * Set any if any `digits' consumed; make it negative to indicate + * overflow. + */ + qbase = (unsigned)base; + cutoff = neg ? (uint64_t)-(LLONG_MIN + LLONG_MAX) + LLONG_MAX : LLONG_MAX; + cutlim = cutoff % qbase; + cutoff /= qbase; + for (acc = 0, any = 0;; c = *s++) { + if (!isascii(c)) + break; + if (isdigit(c)) + c -= '0'; + else if (isalpha(c)) + c -= isupper(c) ? 'A' - 10 : 'a' - 10; + else + break; + if (c >= base) + break; + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) + any = -1; + else { + any = 1; + acc *= qbase; + acc += c; + } + } + if (any < 0) { + acc = neg ? LLONG_MIN : LLONG_MAX; + } else if (neg) + acc = -acc; + if (endptr != 0) + *endptr = __DECONST(char *, any ? s - 1 : nptr); + return (acc); +} + +/* + * Convert a string to an unsigned quad integer. + * + * Ignores `locale' stuff. Assumes that the upper and lower case + * alphabets and digits are each contiguous. + */ +uint64_t +strtouq(const char *nptr, char **endptr, int base) +{ + const char *s = nptr; + uint64_t acc; + unsigned char c; + uint64_t qbase, cutoff; + int neg, any, cutlim; + + /* + * See strtoq for comments as to the logic used. + */ + do { + c = *s++; + } while (isspace(c)); + if (c == '-') { + neg = 1; + c = *s++; + } else { + neg = 0; + if (c == '+') + c = *s++; + } + if ((base == 0 || base == 16) && + c == '0' && (*s == 'x' || *s == 'X')) { + c = s[1]; + s += 2; + base = 16; + } + if (base == 0) + base = c == '0' ? 8 : 10; + qbase = (unsigned)base; + cutoff = (uint64_t)ULLONG_MAX / qbase; + cutlim = (uint64_t)ULLONG_MAX % qbase; + for (acc = 0, any = 0;; c = *s++) { + if (!isascii(c)) + break; + if (isdigit(c)) + c -= '0'; + else if (isalpha(c)) + c -= isupper(c) ? 'A' - 10 : 'a' - 10; + else + break; + if (c >= base) + break; + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) + any = -1; + else { + any = 1; + acc *= qbase; + acc += c; + } + } + if (any < 0) { + acc = ULLONG_MAX; + } else if (neg) + acc = -acc; + if (endptr != 0) + *endptr = __DECONST(char *, any ? s - 1 : nptr); + return (acc); +} + +/* + * Scaled down version of printf(3). + */ +int +vsnprintf(char *str, size_t size, char const *fmt, va_list ap) +{ +#define PCHAR(c) { if (size >= 2) { *str++ = c; size--; } retval++; } + char nbuf[MAXNBUF]; + const char *p, *percent; + int ch, n; + uintmax_t num; + int base, lflag, qflag, tmp, width, ladjust, sharpflag, neg, sign, dot; + int cflag, hflag, jflag, tflag, zflag; + int dwidth, upper; + char padc; + int stop = 0, retval = 0; + + num = 0; + + if (fmt == NULL) + fmt = "(fmt null)\n"; + + for (;;) { + padc = ' '; + width = 0; + while ((ch = (u_char)*fmt++) != '%' || stop) { + if (ch == '\0') { + if (size >= 1) + *str++ = '\0'; + return (retval); + } + PCHAR(ch); + } + percent = fmt - 1; + qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0; + sign = 0; dot = 0; dwidth = 0; upper = 0; + cflag = 0; hflag = 0; jflag = 0; tflag = 0; zflag = 0; +reswitch: switch (ch = (u_char)*fmt++) { + case '.': + dot = 1; + goto reswitch; + case '#': + sharpflag = 1; + goto reswitch; + case '+': + sign = 1; + goto reswitch; + case '-': + ladjust = 1; + goto reswitch; + case '%': + PCHAR(ch); + break; + case '*': + if (!dot) { + width = va_arg(ap, int); + if (width < 0) { + ladjust = !ladjust; + width = -width; + } + } else { + dwidth = va_arg(ap, int); + } + goto reswitch; + case '0': + if (!dot) { + padc = '0'; + goto reswitch; + } + /* fallthrough */ + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + for (n = 0;; ++fmt) { + n = n * 10 + ch - '0'; + ch = *fmt; + if (ch < '0' || ch > '9') + break; + } + if (dot) + dwidth = n; + else + width = n; + goto reswitch; + case 'c': + PCHAR(va_arg(ap, int)); + break; + case 'd': + case 'i': + base = 10; + sign = 1; + goto handle_sign; + case 'h': + if (hflag) { + hflag = 0; + cflag = 1; + } else + hflag = 1; + goto reswitch; + case 'j': + jflag = 1; + goto reswitch; + case 'l': + if (lflag) { + lflag = 0; + qflag = 1; + } else + lflag = 1; + goto reswitch; + case 'n': + if (jflag) + *(va_arg(ap, intmax_t *)) = retval; + else if (qflag) + *(va_arg(ap, int64_t *)) = retval; + else if (lflag) + *(va_arg(ap, long *)) = retval; + else if (zflag) + *(va_arg(ap, size_t *)) = retval; + else if (hflag) + *(va_arg(ap, short *)) = retval; + else if (cflag) + *(va_arg(ap, char *)) = retval; + else + *(va_arg(ap, int *)) = retval; + break; + case 'o': + base = 8; + goto handle_nosign; + case 'p': + base = 16; + sharpflag = (width == 0); + sign = 0; + num = (uintptr_t)va_arg(ap, void *); + goto number; + case 'q': + qflag = 1; + goto reswitch; + case 'r': + base = 10; + if (sign) + goto handle_sign; + goto handle_nosign; + case 's': + p = va_arg(ap, char *); + if (p == NULL) + p = "(null)"; + if (!dot) + n = strlen (p); + else + for (n = 0; n < dwidth && p[n]; n++) + continue; + + width -= n; + + if (!ladjust && width > 0) + while (width--) + PCHAR(padc); + while (n--) + PCHAR(*p++); + if (ladjust && width > 0) + while (width--) + PCHAR(padc); + break; + case 't': + tflag = 1; + goto reswitch; + case 'u': + base = 10; + goto handle_nosign; + case 'X': + upper = 1; + case 'x': + base = 16; + goto handle_nosign; + case 'y': + base = 16; + sign = 1; + goto handle_sign; + case 'z': + zflag = 1; + goto reswitch; +handle_nosign: + sign = 0; + if (jflag) + num = va_arg(ap, uintmax_t); + else if (qflag) + num = va_arg(ap, uint64_t); + else if (tflag) + num = va_arg(ap, ptrdiff_t); + else if (lflag) + num = va_arg(ap, u_long); + else if (zflag) + num = va_arg(ap, size_t); + else if (hflag) + num = (unsigned short)va_arg(ap, int); + else if (cflag) + num = (u_char)va_arg(ap, int); + else + num = va_arg(ap, u_int); + goto number; +handle_sign: + if (jflag) + num = va_arg(ap, intmax_t); + else if (qflag) + num = va_arg(ap, int64_t); + else if (tflag) + num = va_arg(ap, ptrdiff_t); + else if (lflag) + num = va_arg(ap, long); + else if (zflag) + num = va_arg(ap, ssize_t); + else if (hflag) + num = (short)va_arg(ap, int); + else if (cflag) + num = (char)va_arg(ap, int); + else + num = va_arg(ap, int); +number: + if (sign && (intmax_t)num < 0) { + neg = 1; + num = -(intmax_t)num; + } + p = ksprintn(nbuf, num, base, &n, upper); + tmp = 0; + if (sharpflag && num != 0) { + if (base == 8) + tmp++; + else if (base == 16) + tmp += 2; + } + if (neg) + tmp++; + + if (!ladjust && padc == '0') + dwidth = width - tmp; + width -= tmp + (dwidth > n ? dwidth : n); + dwidth -= n; + if (!ladjust) + while (width-- > 0) + PCHAR(' '); + if (neg) + PCHAR('-'); + if (sharpflag && num != 0) { + if (base == 8) { + PCHAR('0'); + } else if (base == 16) { + PCHAR('0'); + PCHAR('x'); + } + } + while (dwidth-- > 0) + PCHAR('0'); + + while (*p) + PCHAR(*p--); + + if (ladjust) + while (width-- > 0) + PCHAR(' '); + + break; + default: + while (percent < fmt) + PCHAR(*percent++); + /* + * Since we ignore a formatting argument it is no + * longer safe to obey the remaining formatting + * arguments as the arguments will no longer match + * the format specs. + */ + stop = 1; + break; + } + } +#undef PCHAR +} + +/** + * snprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @...: Arguments for the format string + */ +int snprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsnprintf(buf,size,fmt,args); + va_end(args); + return i; +} + +/** + * vsprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @fmt: The format string to use + * @args: Arguments for the format string + * + * Call this function if you are already dealing with a va_list. + * You probably want sprintf instead. + */ +int vsprintf(char *buf, const char *fmt, va_list args) +{ + return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args); +} + + +/** + * sprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @fmt: The format string to use + * @...: Arguments for the format string + */ +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} + +/* + * Fill in the given table from the scanset at the given format + * (just after `['). Return a pointer to the character past the + * closing `]'. The table has a 1 wherever characters should be + * considered part of the scanset. + */ +static const u_char * +__sccl(char *tab, const u_char *fmt) +{ + int c, n, v; + + /* first `clear' the whole table */ + c = *fmt++; /* first char hat => negated scanset */ + if (c == '^') { + v = 1; /* default => accept */ + c = *fmt++; /* get new first char */ + } else + v = 0; /* default => reject */ + + /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ + for (n = 0; n < 256; n++) + tab[n] = v; /* memset(tab, v, 256) */ + + if (c == 0) + return (fmt - 1);/* format ended before closing ] */ + + /* + * Now set the entries corresponding to the actual scanset + * to the opposite of the above. + * + * The first character may be ']' (or '-') without being special; + * the last character may be '-'. + */ + v = 1 - v; + for (;;) { + tab[c] = v; /* take character c */ +doswitch: + n = *fmt++; /* and examine the next */ + switch (n) { + + case 0: /* format ended too soon */ + return (fmt - 1); + + case '-': + /* + * A scanset of the form + * [01+-] + * is defined as `the digit 0, the digit 1, + * the character +, the character -', but + * the effect of a scanset such as + * [a-zA-Z0-9] + * is implementation defined. The V7 Unix + * scanf treats `a-z' as `the letters a through + * z', but treats `a-a' as `the letter a, the + * character -, and the letter a'. + * + * For compatibility, the `-' is not considerd + * to define a range if the character following + * it is either a close bracket (required by ANSI) + * or is not numerically greater than the character + * we just stored in the table (c). + */ + n = *fmt; + if (n == ']' || n < c) { + c = '-'; + break; /* resume the for(;;) */ + } + fmt++; + /* fill in the range */ + do { + tab[++c] = v; + } while (c < n); + c = n; + /* + * Alas, the V7 Unix scanf also treats formats + * such as [a-c-e] as `the letters a through e'. + * This too is permitted by the standard.... + */ + goto doswitch; + break; + + case ']': /* end of scanset */ + return (fmt); + + default: /* just another character */ + c = n; + break; + } + } + /* NOTREACHED */ +} + +/** + * vsscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: format of buffer + * @args: arguments + */ +#define BUF 32 /* Maximum length of numeric string. */ + +/* + * Flags used during conversion. + */ +#define LONG 0x01 /* l: long or double */ +#define SHORT 0x04 /* h: short */ +#define SUPPRESS 0x08 /* suppress assignment */ +#define POINTER 0x10 /* weird %p pointer (`fake hex') */ +#define NOSKIP 0x20 /* do not skip blanks */ +#define QUAD 0x400 +#define SHORTSHORT 0x4000 /** hh: char */ + +/* + * The following are used in numeric conversions only: + * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; + * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. + */ +#define SIGNOK 0x40 /* +/- is (still) legal */ +#define NDIGITS 0x80 /* no digits detected */ + +#define DPTOK 0x100 /* (float) decimal point is still legal */ +#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ + +#define PFXOK 0x100 /* 0x prefix is (still) legal */ +#define NZDIGITS 0x200 /* no zero digits detected */ + +/* + * Conversion types. + */ +#define CT_CHAR 0 /* %c conversion */ +#define CT_CCL 1 /* %[...] conversion */ +#define CT_STRING 2 /* %s conversion */ +#define CT_INT 3 /* integer, i.e., strtoq or strtouq */ +typedef uint64_t (*ccfntype)(const char *, char **, int); + +int +vsscanf(const char *inp, char const *fmt0, va_list ap) +{ + int inr; + const u_char *fmt = (const u_char *)fmt0; + int c; /* character from format, or conversion */ + size_t width; /* field width, or 0 */ + char *p; /* points into all kinds of strings */ + int n; /* handy integer */ + int flags; /* flags as defined above */ + char *p0; /* saves original value of p when necessary */ + int nassigned; /* number of fields assigned */ + int nconversions; /* number of conversions */ + int nread; /* number of characters consumed from fp */ + int base; /* base argument to strtoq/strtouq */ + ccfntype ccfn; /* conversion function (strtoq/strtouq) */ + char ccltab[256]; /* character class table for %[...] */ + char buf[BUF]; /* buffer for numeric conversions */ + + /* `basefix' is used to avoid `if' tests in the integer scanner */ + static short basefix[17] = + { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; + + inr = strlen(inp); + + nassigned = 0; + nconversions = 0; + nread = 0; + base = 0; /* XXX just to keep gcc happy */ + ccfn = NULL; /* XXX just to keep gcc happy */ + for (;;) { + c = *fmt++; + if (c == 0) + return (nassigned); + if (isspace(c)) { + while (inr > 0 && isspace(*inp)) + nread++, inr--, inp++; + continue; + } + if (c != '%') + goto literal; + width = 0; + flags = 0; + /* + * switch on the format. continue if done; + * break once format type is derived. + */ +again: c = *fmt++; + switch (c) { + case '%': +literal: + if (inr <= 0) + goto input_failure; + if (*inp != c) + goto match_failure; + inr--, inp++; + nread++; + continue; + + case '*': + flags |= SUPPRESS; + goto again; + case 'l': + if (flags & LONG){ + flags &= ~LONG; + flags |= QUAD; + } else { + flags |= LONG; + } + goto again; + case 'q': + flags |= QUAD; + goto again; + case 'h': + if (flags & SHORT){ + flags &= ~SHORT; + flags |= SHORTSHORT; + } else { + flags |= SHORT; + } + goto again; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + width = width * 10 + c - '0'; + goto again; + + /* + * Conversions. + * + */ + case 'd': + c = CT_INT; + ccfn = (ccfntype)strtoq; + base = 10; + break; + + case 'i': + c = CT_INT; + ccfn = (ccfntype)strtoq; + base = 0; + break; + + case 'o': + c = CT_INT; + ccfn = strtouq; + base = 8; + break; + + case 'u': + c = CT_INT; + ccfn = strtouq; + base = 10; + break; + + case 'x': + flags |= PFXOK; /* enable 0x prefixing */ + c = CT_INT; + ccfn = strtouq; + base = 16; + break; + + case 's': + c = CT_STRING; + break; + + case '[': + fmt = __sccl(ccltab, fmt); + flags |= NOSKIP; + c = CT_CCL; + break; + + case 'c': + flags |= NOSKIP; + c = CT_CHAR; + break; + + case 'p': /* pointer format is like hex */ + flags |= POINTER | PFXOK; + c = CT_INT; + ccfn = strtouq; + base = 16; + break; + + case 'n': + nconversions++; + if (flags & SUPPRESS) /* ??? */ + continue; + if (flags & SHORTSHORT) + *va_arg(ap, char *) = nread; + else if (flags & SHORT) + *va_arg(ap, short *) = nread; + else if (flags & LONG) + *va_arg(ap, long *) = nread; + else if (flags & QUAD) + *va_arg(ap, int64_t *) = nread; + else + *va_arg(ap, int *) = nread; + continue; + } + + /* + * We have a conversion that requires input. + */ + if (inr <= 0) + goto input_failure; + + /* + * Consume leading white space, except for formats + * that suppress this. + */ + if ((flags & NOSKIP) == 0) { + while (isspace(*inp)) { + nread++; + if (--inr > 0) + inp++; + else + goto input_failure; + } + /* + * Note that there is at least one character in + * the buffer, so conversions that do not set NOSKIP + * can no longer result in an input failure. + */ + } + + /* + * Do the conversion. + */ + switch (c) { + + case CT_CHAR: + /* scan arbitrary characters (sets NOSKIP) */ + if (width == 0) + width = 1; + if (flags & SUPPRESS) { + size_t sum = 0; + if ((n = inr) < width) { + sum += n; + width -= n; + inp += n; + if (sum == 0) + goto input_failure; + } else { + sum += width; + inr -= width; + inp += width; + } + nread += sum; + } else { + memcpy(va_arg(ap, char *), inp, width); + inr -= width; + inp += width; + nread += width; + nassigned++; + } + nconversions++; + break; + + case CT_CCL: + /* scan a (nonempty) character class (sets NOSKIP) */ + if (width == 0) + width = (size_t)~0; /* `infinity' */ + /* take only those things in the class */ + if (flags & SUPPRESS) { + n = 0; + while (ccltab[(unsigned char)*inp]) { + n++, inr--, inp++; + if (--width == 0) + break; + if (inr <= 0) { + if (n == 0) + goto input_failure; + break; + } + } + if (n == 0) + goto match_failure; + } else { + p0 = p = va_arg(ap, char *); + while (ccltab[(unsigned char)*inp]) { + inr--; + *p++ = *inp++; + if (--width == 0) + break; + if (inr <= 0) { + if (p == p0) + goto input_failure; + break; + } + } + n = p - p0; + if (n == 0) + goto match_failure; + *p = 0; + nassigned++; + } + nread += n; + nconversions++; + break; + + case CT_STRING: + /* like CCL, but zero-length string OK, & no NOSKIP */ + if (width == 0) + width = (size_t)~0; + if (flags & SUPPRESS) { + n = 0; + while (!isspace(*inp)) { + n++, inr--, inp++; + if (--width == 0) + break; + if (inr <= 0) + break; + } + nread += n; + } else { + p0 = p = va_arg(ap, char *); + while (!isspace(*inp)) { + inr--; + *p++ = *inp++; + if (--width == 0) + break; + if (inr <= 0) + break; + } + *p = 0; + nread += p - p0; + nassigned++; + } + nconversions++; + continue; + + case CT_INT: + /* scan an integer as if by strtoq/strtouq */ +#ifdef hardway + if (width == 0 || width > sizeof(buf) - 1) + width = sizeof(buf) - 1; +#else + /* size_t is unsigned, hence this optimisation */ + if (--width > sizeof(buf) - 2) + width = sizeof(buf) - 2; + width++; +#endif + flags |= SIGNOK | NDIGITS | NZDIGITS; + for (p = buf; width; width--) { + c = *inp; + /* + * Switch on the character; `goto ok' + * if we accept it as a part of number. + */ + switch (c) { + + /* + * The digit 0 is always legal, but is + * special. For %i conversions, if no + * digits (zero or nonzero) have been + * scanned (only signs), we will have + * base==0. In that case, we should set + * it to 8 and enable 0x prefixing. + * Also, if we have not scanned zero digits + * before this, do not turn off prefixing + * (someone else will turn it off if we + * have scanned any nonzero digits). + */ + case '0': + if (base == 0) { + base = 8; + flags |= PFXOK; + } + if (flags & NZDIGITS) + flags &= ~(SIGNOK|NZDIGITS|NDIGITS); + else + flags &= ~(SIGNOK|PFXOK|NDIGITS); + goto ok; + + /* 1 through 7 always legal */ + case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + base = basefix[base]; + flags &= ~(SIGNOK | PFXOK | NDIGITS); + goto ok; + + /* digits 8 and 9 ok iff decimal or hex */ + case '8': case '9': + base = basefix[base]; + if (base <= 8) + break; /* not legal here */ + flags &= ~(SIGNOK | PFXOK | NDIGITS); + goto ok; + + /* letters ok iff hex */ + case 'A': case 'B': case 'C': + case 'D': case 'E': case 'F': + case 'a': case 'b': case 'c': + case 'd': case 'e': case 'f': + /* no need to fix base here */ + if (base <= 10) + break; /* not legal here */ + flags &= ~(SIGNOK | PFXOK | NDIGITS); + goto ok; + + /* sign ok only as first character */ + case '+': case '-': + if (flags & SIGNOK) { + flags &= ~SIGNOK; + goto ok; + } + break; + + /* x ok iff flag still set & 2nd char */ + case 'x': case 'X': + if (flags & PFXOK && p == buf + 1) { + base = 16; /* if %i */ + flags &= ~PFXOK; + goto ok; + } + break; + } + + /* + * If we got here, c is not a legal character + * for a number. Stop accumulating digits. + */ + break; + ok: + /* + * c is legal: store it and look at the next. + */ + *p++ = c; + if (--inr > 0) + inp++; + else + break; /* end of input */ + } + /* + * If we had only a sign, it is no good; push + * back the sign. If the number ends in `x', + * it was [sign] '' 'x', so push back the x + * and treat it as [sign] ''. + */ + if (flags & NDIGITS) { + if (p > buf) { + inp--; + inr++; + } + goto match_failure; + } + c = ((u_char *)p)[-1]; + if (c == 'x' || c == 'X') { + --p; + inp--; + inr++; + } + if ((flags & SUPPRESS) == 0) { + uint64_t res; + + *p = 0; + res = (*ccfn)(buf, (char **)NULL, base); + if (flags & POINTER) + *va_arg(ap, void **) = + (void *)(uintptr_t)res; + else if (flags & SHORTSHORT) + *va_arg(ap, char *) = res; + else if (flags & SHORT) + *va_arg(ap, short *) = res; + else if (flags & LONG) + *va_arg(ap, long *) = res; + else if (flags & QUAD) + *va_arg(ap, int64_t *) = res; + else + *va_arg(ap, int *) = res; + nassigned++; + } + nread += p - buf; + nconversions++; + break; + + } + } +input_failure: + return (nconversions != 0 ? nassigned : -1); +match_failure: + return (nassigned); +} + +/** + * sscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: formatting of buffer + * @...: resulting arguments + */ +int sscanf(const char * buf, const char * fmt, ...) +{ + va_list args; + int i; + + va_start(args,fmt); + i = vsscanf(buf,fmt,args); + va_end(args); + return i; +} + +#endif diff -Nru xen-4.9.0/extras/mini-os/lib/stack_chk_fail.c xen-4.9.2/extras/mini-os/lib/stack_chk_fail.c --- xen-4.9.0/extras/mini-os/lib/stack_chk_fail.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/lib/stack_chk_fail.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,8 @@ +#include +#include + +void __stack_chk_fail(void) +{ + printk("stack smashing detected\n"); + do_exit(); +} diff -Nru xen-4.9.0/extras/mini-os/lib/string.c xen-4.9.2/extras/mini-os/lib/string.c --- xen-4.9.0/extras/mini-os/lib/string.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/lib/string.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,228 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: string.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Library function for string and memory manipulation + * Origin unknown + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + +#include + +/* newlib defines ffs but not ffsll or ffsl */ +int __ffsti2 (long long int lli) +{ + int i, num, t, tmpint, len; + + num = sizeof(long long int) / sizeof(int); + if (num == 1) return (ffs((int) lli)); + len = sizeof(int) * 8; + + for (i = 0; i < num; i++) { + tmpint = (int) (((lli >> len) << len) ^ lli); + + t = ffs(tmpint); + if (t) + return (t + i * len); + lli = lli >> len; + } + return 0; +} + +int __ffsdi2 (long int li) +{ + return __ffsti2 ((long long int) li); +} + +int ffsl (long int li) +{ + return __ffsti2 ((long long int) li); +} + +int ffsll (long long int lli) +{ + return __ffsti2 (lli); +} + +#if !defined HAVE_LIBC + +#include +#include +#include +#include + +int memcmp(const void * cs,const void * ct,size_t count) +{ + const unsigned char *su1, *su2; + signed char res = 0; + + for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) + if ((res = *su1 - *su2) != 0) + break; + return res; +} + +void * memcpy(void * dest,const void *src,size_t count) +{ + char *tmp = (char *) dest; + const char *s = src; + + while (count--) + *tmp++ = *s++; + + return dest; +} + +int strncmp(const char * cs,const char * ct,size_t count) +{ + register signed char __res = 0; + + while (count) { + if ((__res = *cs - *ct++) != 0 || !*cs++) + break; + count--; + } + + return __res; +} + +int strcmp(const char * cs,const char * ct) +{ + register signed char __res; + + while (1) { + if ((__res = *cs - *ct++) != 0 || !*cs++) + break; + } + + return __res; +} + +char * strcpy(char * dest,const char *src) +{ + char *tmp = dest; + + while ((*dest++ = *src++) != '\0') + /* nothing */; + return tmp; +} + +char * strncpy(char * dest,const char *src,size_t count) +{ + char *tmp = dest; + + while (count-- && (*dest++ = *src++) != '\0') + /* nothing */; + + return tmp; +} + +void * memset(void * s,int c,size_t count) +{ + char *xs = (char *) s; + + while (count--) + *xs++ = c; + + return s; +} + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + + +char * strcat(char * dest, const char * src) +{ + char *tmp = dest; + + while (*dest) + dest++; + + while ((*dest++ = *src++) != '\0'); + + return tmp; +} + +size_t strlen(const char * s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +char * strchr(const char * s, int c) +{ + for(; *s != (char) c; ++s) + if (*s == '\0') + return NULL; + return (char *)s; +} + +char * strrchr(const char * s, int c) +{ + const char *res = NULL; + for(; *s != '\0'; ++s) + if (*s == (char) c) + res = s; + return (char *)res; +} + +char * strstr(const char * s1,const char * s2) +{ + int l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *) s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1,s2,l2)) + return (char *) s1; + s1++; + } + return NULL; +} + +char *strdup(const char *x) +{ + int l = strlen(x); + char *res = malloc(l + 1); + if (!res) return NULL; + memcpy(res, x, l + 1); + return res; +} + +int ffs(int i) +{ + int c = 1; + + do { + if (i & 1) + return (c); + i = i >> 1; + c++; + } while (i); + return 0; +} + +#endif diff -Nru xen-4.9.0/extras/mini-os/lib/sys.c xen-4.9.2/extras/mini-os/lib/sys.c --- xen-4.9.0/extras/mini-os/lib/sys.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/lib/sys.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,1552 @@ +/* + * POSIX-compatible libc layer + * + * Samuel Thibault , October 2007 + * + * Provides the UNIXish part of the standard libc function. + * + * Relatively straight-forward: just multiplex the file descriptor operations + * among the various file types (console, FS, network, ...) + */ + +//#define LIBC_VERBOSE +//#define LIBC_DEBUG + +#ifdef LIBC_DEBUG +#define DEBUG(fmt,...) printk(fmt, ##__VA_ARGS__) +#else +#define DEBUG(fmt,...) +#endif + +#ifdef HAVE_LIBC +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_LWIP +#include +#endif + +#define debug(fmt, ...) \ + +#define print_unsupported(fmt, ...) \ + printk("Unsupported function "fmt" called in Mini-OS kernel\n", ## __VA_ARGS__); + +/* Crash on function call */ +#define unsupported_function_crash(function) \ + int __unsup_##function(void) asm(#function); \ + int __unsup_##function(void) \ + { \ + print_unsupported(#function); \ + do_exit(); \ + } + +/* Log and err out on function call */ +#define unsupported_function_log(type, function, ret) \ + type __unsup_##function(void) asm(#function); \ + type __unsup_##function(void) \ + { \ + print_unsupported(#function); \ + errno = ENOSYS; \ + return ret; \ + } + +/* Err out on function call */ +#define unsupported_function(type, function, ret) \ + type __unsup_##function(void) asm(#function); \ + type __unsup_##function(void) \ + { \ + errno = ENOSYS; \ + return ret; \ + } + +#define NOFILE 32 +extern void minios_interface_close_fd(int fd); +extern void minios_evtchn_close_fd(int fd); +extern void minios_gnttab_close_fd(int fd); + +pthread_mutex_t fd_lock = PTHREAD_MUTEX_INITIALIZER; +struct file files[NOFILE] = { + { .type = FTYPE_CONSOLE }, /* stdin */ + { .type = FTYPE_CONSOLE }, /* stdout */ + { .type = FTYPE_CONSOLE }, /* stderr */ +}; + +DECLARE_WAIT_QUEUE_HEAD(event_queue); + +int alloc_fd(enum fd_type type) +{ + int i; + pthread_mutex_lock(&fd_lock); + for (i=0; i 0; i--) + if (files[i].type != FTYPE_NONE) + close(i); + pthread_mutex_unlock(&fd_lock); +} + +int dup2(int oldfd, int newfd) +{ + pthread_mutex_lock(&fd_lock); + if (files[newfd].type != FTYPE_NONE) + close(newfd); + // XXX: this is a bit bogus, as we are supposed to share the offset etc + files[newfd] = files[oldfd]; + pthread_mutex_unlock(&fd_lock); + return 0; +} + +pid_t getpid(void) +{ + return 1; +} + +pid_t getppid(void) +{ + return 1; +} + +pid_t setsid(void) +{ + return 1; +} + +char *getcwd(char *buf, size_t size) +{ + snprintf(buf, size, "/"); + return buf; +} + +#define LOG_PATH "/var/log/" +#define SAVE_PATH "/var/lib/xen" +#define SAVE_CONSOLE 1 +#define RESTORE_CONSOLE 2 + +int mkdir(const char *pathname, mode_t mode) +{ + errno = EIO; + return -1; +} + +#ifdef CONFIG_CONSFRONT +int posix_openpt(int flags) +{ + struct consfront_dev *dev; + + /* Ignore flags */ + + dev = init_consfront(NULL); + dev->fd = alloc_fd(FTYPE_CONSOLE); + files[dev->fd].cons.dev = dev; + + printk("fd(%d) = posix_openpt\n", dev->fd); + return(dev->fd); +} + +int open_savefile(const char *path, int save) +{ + struct consfront_dev *dev; + char nodename[64]; + + snprintf(nodename, sizeof(nodename), "device/console/%d", save ? SAVE_CONSOLE : RESTORE_CONSOLE); + + dev = init_consfront(nodename); + dev->fd = alloc_fd(FTYPE_SAVEFILE); + files[dev->fd].cons.dev = dev; + + printk("fd(%d) = open_savefile\n", dev->fd); + return(dev->fd); +} +#else +int posix_openpt(int flags) +{ + errno = EIO; + return -1; +} +int open_savefile(const char *path, int save) +{ + errno = EIO; + return -1; +} +#endif + +int open(const char *pathname, int flags, ...) +{ + int fd; + /* Ugly, but fine. */ + if (!strncmp(pathname,LOG_PATH,strlen(LOG_PATH))) { + fd = alloc_fd(FTYPE_CONSOLE); + printk("open(%s) -> %d\n", pathname, fd); + return fd; + } + if (!strncmp(pathname, "/dev/mem", strlen("/dev/mem"))) { + fd = alloc_fd(FTYPE_MEM); + printk("open(/dev/mem) -> %d\n", fd); + return fd; + } + if (!strncmp(pathname, "/dev/ptmx", strlen("/dev/ptmx"))) + return posix_openpt(flags); + if (!strncmp(pathname,SAVE_PATH,strlen(SAVE_PATH))) + return open_savefile(pathname, flags & O_WRONLY); + errno = EIO; + return -1; +} + +int isatty(int fd) +{ + return files[fd].type == FTYPE_CONSOLE; +} + +int read(int fd, void *buf, size_t nbytes) +{ + switch (files[fd].type) { + case FTYPE_SAVEFILE: + case FTYPE_CONSOLE: { + int ret; + DEFINE_WAIT(w); + while(1) { + add_waiter(w, console_queue); + ret = xencons_ring_recv(files[fd].cons.dev, buf, nbytes); + if (ret) + break; + schedule(); + } + remove_waiter(w, console_queue); + return ret; + } +#ifdef HAVE_LWIP + case FTYPE_SOCKET: + return lwip_read(files[fd].socket.fd, buf, nbytes); +#endif +#ifdef CONFIG_NETFRONT + case FTYPE_TAP: { + ssize_t ret; + ret = netfront_receive(files[fd].tap.dev, buf, nbytes); + if (ret <= 0) { + errno = EAGAIN; + return -1; + } + return ret; + } +#endif +#ifdef CONFIG_KBDFRONT + case FTYPE_KBD: { + int ret, n; + n = nbytes / sizeof(union xenkbd_in_event); + ret = kbdfront_receive(files[fd].kbd.dev, buf, n); + if (ret <= 0) { + errno = EAGAIN; + return -1; + } + return ret * sizeof(union xenkbd_in_event); + } +#endif +#ifdef CONFIG_FBFRONT + case FTYPE_FB: { + int ret, n; + n = nbytes / sizeof(union xenfb_in_event); + ret = fbfront_receive(files[fd].fb.dev, buf, n); + if (ret <= 0) { + errno = EAGAIN; + return -1; + } + return ret * sizeof(union xenfb_in_event); + } +#endif +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: { + return blkfront_posix_read(fd, buf, nbytes); + } +#endif +#ifdef CONFIG_TPMFRONT + case FTYPE_TPMFRONT: { + return tpmfront_posix_read(fd, buf, nbytes); + } +#endif +#ifdef CONFIG_TPM_TIS + case FTYPE_TPM_TIS: { + return tpm_tis_posix_read(fd, buf, nbytes); + } +#endif + default: + break; + } + printk("read(%d): Bad descriptor\n", fd); + errno = EBADF; + return -1; +} + +int write(int fd, const void *buf, size_t nbytes) +{ + switch (files[fd].type) { + case FTYPE_SAVEFILE: { + int ret = 0, tot = nbytes; + while (nbytes > 0) { + ret = xencons_ring_send(files[fd].cons.dev, (char *)buf, nbytes); + nbytes -= ret; + buf = (char *)buf + ret; + } + return tot - nbytes; + } + case FTYPE_CONSOLE: + console_print(files[fd].cons.dev, (char *)buf, nbytes); + return nbytes; +#ifdef HAVE_LWIP + case FTYPE_SOCKET: + return lwip_write(files[fd].socket.fd, (void*) buf, nbytes); +#endif +#ifdef CONFIG_NETFRONT + case FTYPE_TAP: + netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes); + return nbytes; +#endif +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + return blkfront_posix_write(fd, buf, nbytes); +#endif +#ifdef CONFIG_TPMFRONT + case FTYPE_TPMFRONT: + return tpmfront_posix_write(fd, buf, nbytes); +#endif +#ifdef CONFIG_TPM_TIS + case FTYPE_TPM_TIS: + return tpm_tis_posix_write(fd, buf, nbytes); +#endif + default: + break; + } + printk("write(%d): Bad descriptor\n", fd); + errno = EBADF; + return -1; +} + +off_t lseek(int fd, off_t offset, int whence) +{ + off_t* target = NULL; + switch(files[fd].type) { +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + target = &files[fd].blk.offset; + break; +#endif +#ifdef CONFIG_TPMFRONT + case FTYPE_TPMFRONT: + target = &files[fd].tpmfront.offset; + break; +#endif +#ifdef CONFIG_TPM_TIS + case FTYPE_TPM_TIS: + target = &files[fd].tpm_tis.offset; + break; +#endif + case FTYPE_FILE: + target = &files[fd].file.offset; + break; + default: + /* Not implemented for this filetype */ + errno = ESPIPE; + return (off_t) -1; + } + + switch (whence) { + case SEEK_SET: + *target = offset; + break; + case SEEK_CUR: + *target += offset; + break; + case SEEK_END: + { + struct stat st; + int ret; + ret = fstat(fd, &st); + if (ret) + return -1; + *target = st.st_size + offset; + break; + } + default: + errno = EINVAL; + return -1; + } + return *target; +} + +int fsync(int fd) { + errno = EBADF; + return -1; +} + +int close(int fd) +{ + printk("close(%d)\n", fd); + switch (files[fd].type) { + default: + files[fd].type = FTYPE_NONE; + return 0; +#ifdef CONFIG_XENBUS + case FTYPE_XENBUS: + xs_daemon_close((void*)(intptr_t) fd); + return 0; +#endif +#ifdef HAVE_LWIP + case FTYPE_SOCKET: { + int res = lwip_close(files[fd].socket.fd); + files[fd].type = FTYPE_NONE; + return res; + } +#endif +#ifdef CONFIG_XC + case FTYPE_XC: + minios_interface_close_fd(fd); + return 0; + case FTYPE_EVTCHN: + minios_evtchn_close_fd(fd); + return 0; + case FTYPE_GNTMAP: + minios_gnttab_close_fd(fd); + return 0; +#endif +#ifdef CONFIG_NETFRONT + case FTYPE_TAP: + shutdown_netfront(files[fd].tap.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + shutdown_blkfront(files[fd].blk.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_TPMFRONT + case FTYPE_TPMFRONT: + shutdown_tpmfront(files[fd].tpmfront.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_TPM_TIS + case FTYPE_TPM_TIS: + shutdown_tpm_tis(files[fd].tpm_tis.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_KBDFRONT + case FTYPE_KBD: + shutdown_kbdfront(files[fd].kbd.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_FBFRONT + case FTYPE_FB: + shutdown_fbfront(files[fd].fb.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_CONSFRONT + case FTYPE_SAVEFILE: + case FTYPE_CONSOLE: + fini_console(files[fd].cons.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif + case FTYPE_NONE: + break; + } + printk("close(%d): Bad descriptor\n", fd); + errno = EBADF; + return -1; +} + +static void init_stat(struct stat *buf) +{ + memset(buf, 0, sizeof(*buf)); + buf->st_dev = 0; + buf->st_ino = 0; + buf->st_nlink = 1; + buf->st_rdev = 0; + buf->st_blksize = 4096; + buf->st_blocks = 0; +} + +int stat(const char *path, struct stat *buf) +{ + errno = EIO; + return -1; +} + +int fstat(int fd, struct stat *buf) +{ + init_stat(buf); + switch (files[fd].type) { + case FTYPE_SAVEFILE: + case FTYPE_CONSOLE: + case FTYPE_SOCKET: { + if (files[fd].type == FTYPE_CONSOLE) + buf->st_mode = S_IFCHR|S_IRUSR|S_IWUSR; + else if (files[fd].type == FTYPE_SOCKET) + buf->st_mode = S_IFSOCK|S_IRUSR|S_IWUSR; + else if (files[fd].type == FTYPE_SAVEFILE) + buf->st_mode = S_IFREG|S_IRUSR|S_IWUSR; + buf->st_uid = 0; + buf->st_gid = 0; + buf->st_size = 0; + buf->st_atime = + buf->st_mtime = + buf->st_ctime = time(NULL); + return 0; + } +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + return blkfront_posix_fstat(fd, buf); +#endif +#ifdef CONFIG_TPMFRONT + case FTYPE_TPMFRONT: + return tpmfront_posix_fstat(fd, buf); +#endif +#ifdef CONFIG_TPM_TIS + case FTYPE_TPM_TIS: + return tpm_tis_posix_fstat(fd, buf); +#endif + default: + break; + } + + printk("statf(%d): Bad descriptor\n", fd); + errno = EBADF; + return -1; +} + +int ftruncate(int fd, off_t length) +{ + errno = EBADF; + return -1; +} + +int remove(const char *pathname) +{ + errno = EIO; + return -1; +} + +int unlink(const char *pathname) +{ + return remove(pathname); +} + +int rmdir(const char *pathname) +{ + return remove(pathname); +} + +int fcntl(int fd, int cmd, ...) +{ + long arg; + va_list ap; + va_start(ap, cmd); + arg = va_arg(ap, long); + va_end(ap); + + switch (cmd) { +#ifdef HAVE_LWIP + case F_SETFL: + if (files[fd].type == FTYPE_SOCKET && !(arg & ~O_NONBLOCK)) { + /* Only flag supported: non-blocking mode */ + uint32_t nblock = !!(arg & O_NONBLOCK); + return lwip_ioctl(files[fd].socket.fd, FIONBIO, &nblock); + } + /* Fallthrough */ +#endif + default: + printk("fcntl(%d, %d, %lx/%lo)\n", fd, cmd, arg, arg); + errno = ENOSYS; + return -1; + } +} + +DIR *opendir(const char *name) +{ + DIR *ret; + ret = malloc(sizeof(*ret)); + ret->name = strdup(name); + ret->offset = 0; + ret->entries = NULL; + ret->curentry = -1; + ret->nbentries = 0; + ret->has_more = 1; + return ret; +} + +struct dirent *readdir(DIR *dir) +{ + return NULL; +} + +int closedir(DIR *dir) +{ + int i; + for (i=0; inbentries; i++) + free(dir->entries[i]); + free(dir->entries); + free(dir->name); + free(dir); + return 0; +} + +/* We assume that only the main thread calls select(). */ + +#if defined(LIBC_DEBUG) || defined(LIBC_VERBOSE) +static const char file_types[] = { + [FTYPE_NONE] = 'N', + [FTYPE_CONSOLE] = 'C', + [FTYPE_XENBUS] = 'S', + [FTYPE_XC] = 'X', + [FTYPE_EVTCHN] = 'E', + [FTYPE_SOCKET] = 's', + [FTYPE_TAP] = 'T', + [FTYPE_BLK] = 'B', + [FTYPE_KBD] = 'K', + [FTYPE_FB] = 'G', +}; +#endif +#ifdef LIBC_DEBUG +static void dump_set(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) +{ + int i, comma; +#define printfds(set) do {\ + comma = 0; \ + for (i = 0; i < nfds; i++) { \ + if (FD_ISSET(i, set)) { \ + if (comma) \ + printk(", "); \ + printk("%d(%c)", i, file_types[files[i].type]); \ + comma = 1; \ + } \ + } \ +} while (0) + + printk("["); + if (readfds) + printfds(readfds); + printk("], ["); + if (writefds) + printfds(writefds); + printk("], ["); + if (exceptfds) + printfds(exceptfds); + printk("], "); + if (timeout) + printk("{ %ld, %ld }", timeout->tv_sec, timeout->tv_usec); +} +#else +#define dump_set(nfds, readfds, writefds, exceptfds, timeout) +#endif + +#ifdef LIBC_DEBUG +static void dump_pollfds(struct pollfd *pfd, int nfds, int timeout) +{ + int i, comma, fd; + + printk("["); + comma = 0; + for (i = 0; i < nfds; i++) { + fd = pfd[i].fd; + if (comma) + printk(", "); + printk("%d(%c)/%02x", fd, file_types[files[fd].type], + pfd[i].events); + comma = 1; + } + printk("]"); + + printk(", %d, %d", nfds, timeout); +} +#else +#define dump_pollfds(pfds, nfds, timeout) +#endif + +/* Just poll without blocking */ +static int select_poll(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds) +{ + int i, n = 0; +#ifdef HAVE_LWIP + int sock_n = 0, sock_nfds = 0; + fd_set sock_readfds, sock_writefds, sock_exceptfds; + struct timeval timeout = { .tv_sec = 0, .tv_usec = 0}; +#endif + +#ifdef LIBC_VERBOSE + static int nb; + static int nbread[NOFILE], nbwrite[NOFILE], nbexcept[NOFILE]; + static s_time_t lastshown; + + nb++; +#endif + +#ifdef HAVE_LWIP + /* first poll network */ + FD_ZERO(&sock_readfds); + FD_ZERO(&sock_writefds); + FD_ZERO(&sock_exceptfds); + for (i = 0; i < nfds; i++) { + if (files[i].type == FTYPE_SOCKET) { + if (FD_ISSET(i, readfds)) { + FD_SET(files[i].socket.fd, &sock_readfds); + sock_nfds = i+1; + } + if (FD_ISSET(i, writefds)) { + FD_SET(files[i].socket.fd, &sock_writefds); + sock_nfds = i+1; + } + if (FD_ISSET(i, exceptfds)) { + FD_SET(files[i].socket.fd, &sock_exceptfds); + sock_nfds = i+1; + } + } + } + if (sock_nfds > 0) { + DEBUG("lwip_select("); + dump_set(nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); + DEBUG("); -> "); + sock_n = lwip_select(sock_nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); + dump_set(nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); + DEBUG("\n"); + } +#endif + + /* Then see others as well. */ + for (i = 0; i < nfds; i++) { + switch(files[i].type) { + default: + if (FD_ISSET(i, readfds) || FD_ISSET(i, writefds) || FD_ISSET(i, exceptfds)) + printk("bogus fd %d in select\n", i); + /* Fallthrough. */ + case FTYPE_CONSOLE: + if (FD_ISSET(i, readfds)) { + if (xencons_ring_avail(files[i].cons.dev)) + n++; + else + FD_CLR(i, readfds); + } + if (FD_ISSET(i, writefds)) + n++; + FD_CLR(i, exceptfds); + break; +#ifdef CONFIG_XENBUS + case FTYPE_XENBUS: + if (FD_ISSET(i, readfds)) { + if (files[i].xenbus.events) + n++; + else + FD_CLR(i, readfds); + } + FD_CLR(i, writefds); + FD_CLR(i, exceptfds); + break; +#endif + case FTYPE_EVTCHN: + case FTYPE_TAP: + case FTYPE_BLK: + case FTYPE_KBD: + case FTYPE_FB: + if (FD_ISSET(i, readfds)) { + if (files[i].read) + n++; + else + FD_CLR(i, readfds); + } + FD_CLR(i, writefds); + FD_CLR(i, exceptfds); + break; +#ifdef HAVE_LWIP + case FTYPE_SOCKET: + if (FD_ISSET(i, readfds)) { + /* Optimize no-network-packet case. */ + if (sock_n && FD_ISSET(files[i].socket.fd, &sock_readfds)) + n++; + else + FD_CLR(i, readfds); + } + if (FD_ISSET(i, writefds)) { + if (sock_n && FD_ISSET(files[i].socket.fd, &sock_writefds)) + n++; + else + FD_CLR(i, writefds); + } + if (FD_ISSET(i, exceptfds)) { + if (sock_n && FD_ISSET(files[i].socket.fd, &sock_exceptfds)) + n++; + else + FD_CLR(i, exceptfds); + } + break; +#endif + } +#ifdef LIBC_VERBOSE + if (FD_ISSET(i, readfds)) + nbread[i]++; + if (FD_ISSET(i, writefds)) + nbwrite[i]++; + if (FD_ISSET(i, exceptfds)) + nbexcept[i]++; +#endif + } +#ifdef LIBC_VERBOSE + if (NOW() > lastshown + 1000000000ull) { + lastshown = NOW(); + printk("%lu MB free, ", num_free_pages() / ((1 << 20) / PAGE_SIZE)); + printk("%d(%d): ", nb, sock_n); + for (i = 0; i < nfds; i++) { + if (nbread[i] || nbwrite[i] || nbexcept[i]) + printk(" %d(%c):", i, file_types[files[i].type]); + if (nbread[i]) + printk(" %dR", nbread[i]); + if (nbwrite[i]) + printk(" %dW", nbwrite[i]); + if (nbexcept[i]) + printk(" %dE", nbexcept[i]); + } + printk("\n"); + memset(nbread, 0, sizeof(nbread)); + memset(nbwrite, 0, sizeof(nbwrite)); + memset(nbexcept, 0, sizeof(nbexcept)); + nb = 0; + } +#endif + return n; +} + +/* The strategy is to + * - announce that we will maybe sleep + * - poll a bit ; if successful, return + * - if timeout, return + * - really sleep (except if somebody woke us in the meanwhile) */ +int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, + struct timeval *timeout) +{ + int n, ret; + fd_set myread, mywrite, myexcept; + struct thread *thread = get_current(); + s_time_t start = NOW(), stop; +#ifdef CONFIG_NETFRONT + DEFINE_WAIT(netfront_w); +#endif + DEFINE_WAIT(event_w); +#ifdef CONFIG_BLKFRONT + DEFINE_WAIT(blkfront_w); +#endif +#ifdef CONFIG_XENBUS + DEFINE_WAIT(xenbus_watch_w); +#endif +#ifdef CONFIG_KBDFRONT + DEFINE_WAIT(kbdfront_w); +#endif + DEFINE_WAIT(console_w); + + assert(thread == main_thread); + + DEBUG("select(%d, ", nfds); + dump_set(nfds, readfds, writefds, exceptfds, timeout); + DEBUG(");\n"); + + if (timeout) + stop = start + SECONDS(timeout->tv_sec) + timeout->tv_usec * 1000; + else + /* just make gcc happy */ + stop = start; + + /* Tell people we're going to sleep before looking at what they are + * saying, hence letting them wake us if events happen between here and + * schedule() */ +#ifdef CONFIG_NETFRONT + add_waiter(netfront_w, netfront_queue); +#endif + add_waiter(event_w, event_queue); +#ifdef CONFIG_BLKFRONT + add_waiter(blkfront_w, blkfront_queue); +#endif +#ifdef CONFIG_XENBUS + add_waiter(xenbus_watch_w, xenbus_watch_queue); +#endif +#ifdef CONFIG_KBDFRONT + add_waiter(kbdfront_w, kbdfront_queue); +#endif + add_waiter(console_w, console_queue); + + if (readfds) + myread = *readfds; + else + FD_ZERO(&myread); + if (writefds) + mywrite = *writefds; + else + FD_ZERO(&mywrite); + if (exceptfds) + myexcept = *exceptfds; + else + FD_ZERO(&myexcept); + + DEBUG("polling "); + dump_set(nfds, &myread, &mywrite, &myexcept, timeout); + DEBUG("\n"); + n = select_poll(nfds, &myread, &mywrite, &myexcept); + + if (n) { + dump_set(nfds, readfds, writefds, exceptfds, timeout); + if (readfds) + *readfds = myread; + if (writefds) + *writefds = mywrite; + if (exceptfds) + *exceptfds = myexcept; + DEBUG(" -> "); + dump_set(nfds, readfds, writefds, exceptfds, timeout); + DEBUG("\n"); + wake(thread); + ret = n; + goto out; + } + if (timeout && NOW() >= stop) { + if (readfds) + FD_ZERO(readfds); + if (writefds) + FD_ZERO(writefds); + if (exceptfds) + FD_ZERO(exceptfds); + timeout->tv_sec = 0; + timeout->tv_usec = 0; + wake(thread); + ret = 0; + goto out; + } + + if (timeout) + thread->wakeup_time = stop; + schedule(); + + if (readfds) + myread = *readfds; + else + FD_ZERO(&myread); + if (writefds) + mywrite = *writefds; + else + FD_ZERO(&mywrite); + if (exceptfds) + myexcept = *exceptfds; + else + FD_ZERO(&myexcept); + + n = select_poll(nfds, &myread, &mywrite, &myexcept); + + if (n) { + if (readfds) + *readfds = myread; + if (writefds) + *writefds = mywrite; + if (exceptfds) + *exceptfds = myexcept; + ret = n; + goto out; + } + errno = EINTR; + ret = -1; + +out: +#ifdef CONFIG_NETFRONT + remove_waiter(netfront_w, netfront_queue); +#endif + remove_waiter(event_w, event_queue); +#ifdef CONFIG_BLKFRONT + remove_waiter(blkfront_w, blkfront_queue); +#endif +#ifdef CONFIG_XENBUS + remove_waiter(xenbus_watch_w, xenbus_watch_queue); +#endif +#ifdef CONFIG_KBDFRONT + remove_waiter(kbdfront_w, kbdfront_queue); +#endif + remove_waiter(console_w, console_queue); + return ret; +} + +/* Wrap around select */ +int poll(struct pollfd _pfd[], nfds_t _nfds, int _timeout) +{ + int n, ret; + int i, fd; + struct timeval _timeo, *timeo = NULL; + fd_set rfds, wfds, efds; + int max_fd = -1; + + DEBUG("poll("); + dump_pollfds(_pfd, _nfds, _timeout); + DEBUG(")\n"); + + FD_ZERO(&rfds); + FD_ZERO(&wfds); + FD_ZERO(&efds); + + n = 0; + + for (i = 0; i < _nfds; i++) { + fd = _pfd[i].fd; + _pfd[i].revents = 0; + + /* fd < 0, revents = 0, which is already set */ + if (fd < 0) continue; + + /* fd is invalid, revents = POLLNVAL, increment counter */ + if (fd >= NOFILE || files[fd].type == FTYPE_NONE) { + n++; + _pfd[i].revents |= POLLNVAL; + continue; + } + + /* normal case, map POLL* into readfds and writefds: + * POLLIN -> readfds + * POLLOUT -> writefds + * POLL* -> none + */ + if (_pfd[i].events & POLLIN) + FD_SET(fd, &rfds); + if (_pfd[i].events & POLLOUT) + FD_SET(fd, &wfds); + /* always set exceptfds */ + FD_SET(fd, &efds); + if (fd > max_fd) + max_fd = fd; + } + + /* should never sleep when we already have events */ + if (n) { + _timeo.tv_sec = 0; + _timeo.tv_usec = 0; + timeo = &_timeo; + } else if (_timeout >= 0) { + /* normal case, construct _timeout, might sleep */ + _timeo.tv_sec = _timeout / 1000; + _timeo.tv_usec = (_timeout % 1000) * 1000; + timeo = &_timeo; + } else { + /* _timeout < 0, block forever */ + timeo = NULL; + } + + + ret = select(max_fd+1, &rfds, &wfds, &efds, timeo); + /* error in select, just return, errno is set by select() */ + if (ret < 0) + return ret; + + for (i = 0; i < _nfds; i++) { + fd = _pfd[i].fd; + + /* the revents has already been set for all error case */ + if (fd < 0 || fd >= NOFILE || files[fd].type == FTYPE_NONE) + continue; + + if (FD_ISSET(fd, &rfds) || FD_ISSET(fd, &wfds) || FD_ISSET(fd, &efds)) + n++; + if (FD_ISSET(fd, &efds)) { + /* anything bad happens we set POLLERR */ + _pfd[i].revents |= POLLERR; + continue; + } + if (FD_ISSET(fd, &rfds)) + _pfd[i].revents |= POLLIN; + if (FD_ISSET(fd, &wfds)) + _pfd[i].revents |= POLLOUT; + } + + return n; +} + +#ifdef HAVE_LWIP +int socket(int domain, int type, int protocol) +{ + int fd, res; + fd = lwip_socket(domain, type, protocol); + if (fd < 0) + return -1; + res = alloc_fd(FTYPE_SOCKET); + printk("socket -> %d\n", res); + files[res].socket.fd = fd; + return res; +} + +int accept(int s, struct sockaddr *addr, socklen_t *addrlen) +{ + int fd, res; + if (files[s].type != FTYPE_SOCKET) { + printk("accept(%d): Bad descriptor\n", s); + errno = EBADF; + return -1; + } + fd = lwip_accept(files[s].socket.fd, addr, addrlen); + if (fd < 0) + return -1; + res = alloc_fd(FTYPE_SOCKET); + files[res].socket.fd = fd; + printk("accepted on %d -> %d\n", s, res); + return res; +} + +#define LWIP_STUB(ret, name, proto, args) \ +ret name proto \ +{ \ + if (files[s].type != FTYPE_SOCKET) { \ + printk(#name "(%d): Bad descriptor\n", s); \ + errno = EBADF; \ + return -1; \ + } \ + s = files[s].socket.fd; \ + return lwip_##name args; \ +} + +LWIP_STUB(int, bind, (int s, struct sockaddr *my_addr, socklen_t addrlen), (s, my_addr, addrlen)) +LWIP_STUB(int, getsockopt, (int s, int level, int optname, void *optval, socklen_t *optlen), (s, level, optname, optval, optlen)) +LWIP_STUB(int, setsockopt, (int s, int level, int optname, void *optval, socklen_t optlen), (s, level, optname, optval, optlen)) +LWIP_STUB(int, connect, (int s, struct sockaddr *serv_addr, socklen_t addrlen), (s, serv_addr, addrlen)) +LWIP_STUB(int, listen, (int s, int backlog), (s, backlog)); +LWIP_STUB(ssize_t, recv, (int s, void *buf, size_t len, int flags), (s, buf, len, flags)) +LWIP_STUB(ssize_t, recvfrom, (int s, void *buf, size_t len, int flags, struct sockaddr *from, socklen_t *fromlen), (s, buf, len, flags, from, fromlen)) +LWIP_STUB(ssize_t, send, (int s, void *buf, size_t len, int flags), (s, buf, len, flags)) +LWIP_STUB(ssize_t, sendto, (int s, void *buf, size_t len, int flags, struct sockaddr *to, socklen_t tolen), (s, buf, len, flags, to, tolen)) +LWIP_STUB(int, getsockname, (int s, struct sockaddr *name, socklen_t *namelen), (s, name, namelen)) +#endif + +static char *syslog_ident; +void openlog(const char *ident, int option, int facility) +{ + free(syslog_ident); + syslog_ident = strdup(ident); +} + +void vsyslog(int priority, const char *format, va_list ap) +{ + printk("%s: ", syslog_ident); + print(0, format, ap); +} + +void syslog(int priority, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vsyslog(priority, format, ap); + va_end(ap); +} + +void closelog(void) +{ + free(syslog_ident); + syslog_ident = NULL; +} + +void vwarn(const char *format, va_list ap) +{ + int the_errno = errno; + printk("stubdom: "); + if (format) { + print(0, format, ap); + printk(", "); + } + printk("%s", strerror(the_errno)); +} + +void warn(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vwarn(format, ap); + va_end(ap); +} + +void verr(int eval, const char *format, va_list ap) +{ + vwarn(format, ap); + exit(eval); +} + +void err(int eval, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + verr(eval, format, ap); + va_end(ap); +} + +void vwarnx(const char *format, va_list ap) +{ + printk("stubdom: "); + if (format) + print(0, format, ap); +} + +void warnx(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vwarnx(format, ap); + va_end(ap); +} + +void verrx(int eval, const char *format, va_list ap) +{ + vwarnx(format, ap); + exit(eval); +} + +void errx(int eval, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + verrx(eval, format, ap); + va_end(ap); +} + +int nanosleep(const struct timespec *req, struct timespec *rem) +{ + s_time_t start = NOW(); + s_time_t stop = start + SECONDS(req->tv_sec) + req->tv_nsec; + s_time_t stopped; + struct thread *thread = get_current(); + + thread->wakeup_time = stop; + clear_runnable(thread); + schedule(); + stopped = NOW(); + + if (rem) + { + s_time_t remaining = stop - stopped; + if (remaining > 0) + { + rem->tv_nsec = remaining % 1000000000ULL; + rem->tv_sec = remaining / 1000000000ULL; + } else memset(rem, 0, sizeof(*rem)); + } + + return 0; +} + +int usleep(useconds_t usec) +{ + /* "usec shall be less than one million." */ + struct timespec req; + req.tv_nsec = usec * 1000; + req.tv_sec = 0; + + if (nanosleep(&req, NULL)) + return -1; + + return 0; +} + +unsigned int sleep(unsigned int seconds) +{ + struct timespec req, rem; + req.tv_sec = seconds; + req.tv_nsec = 0; + + if (nanosleep(&req, &rem)) + return -1; + + if (rem.tv_nsec > 0) + rem.tv_sec++; + + return rem.tv_sec; +} + +int clock_gettime(clockid_t clk_id, struct timespec *tp) +{ + switch (clk_id) { + case CLOCK_MONOTONIC: + { + struct timeval tv; + + gettimeofday(&tv, NULL); + + tp->tv_sec = tv.tv_sec; + tp->tv_nsec = tv.tv_usec * 1000; + + break; + } + case CLOCK_REALTIME: + { + uint64_t nsec = monotonic_clock(); + + tp->tv_sec = nsec / 1000000000ULL; + tp->tv_nsec = nsec % 1000000000ULL; + + break; + } + default: + print_unsupported("clock_gettime(%ld)", (long) clk_id); + errno = EINVAL; + return -1; + } + + return 0; +} + +uid_t getuid(void) +{ + return 0; +} + +uid_t geteuid(void) +{ + return 0; +} + +gid_t getgid(void) +{ + return 0; +} + +gid_t getegid(void) +{ + return 0; +} + +int gethostname(char *name, size_t namelen) +{ + strncpy(name, "mini-os", namelen); + return 0; +} + +size_t getpagesize(void) +{ + return PAGE_SIZE; +} + +void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) +{ + unsigned long n = (length + PAGE_SIZE - 1) / PAGE_SIZE; + + ASSERT(!start); + ASSERT(prot == (PROT_READ|PROT_WRITE)); + ASSERT((fd == -1 && (flags == (MAP_SHARED|MAP_ANON) || flags == (MAP_PRIVATE|MAP_ANON))) + || (fd != -1 && flags == MAP_SHARED)); + + if (fd == -1) + return map_zero(n, 1); +#ifdef CONFIG_XC + else if (files[fd].type == FTYPE_XC) { + unsigned long zero = 0; + return map_frames_ex(&zero, n, 0, 0, 1, DOMID_SELF, NULL, 0); + } +#endif + else if (files[fd].type == FTYPE_MEM) { + unsigned long first_mfn = offset >> PAGE_SHIFT; + return map_frames_ex(&first_mfn, n, 0, 1, 1, DOMID_IO, NULL, _PAGE_PRESENT|_PAGE_RW); + } else ASSERT(0); +} + +int munmap(void *start, size_t length) +{ + int total = length / PAGE_SIZE; + int ret; + + ret = unmap_frames((unsigned long)start, (unsigned long)total); + if (ret) { + errno = ret; + return -1; + } + return 0; +} + +void sparse(unsigned long data, size_t size) +{ + unsigned long newdata; + xen_pfn_t *mfns; + int i, n; + + newdata = (data + PAGE_SIZE - 1) & PAGE_MASK; + if (newdata - data > size) + return; + size -= newdata - data; + data = newdata; + n = size / PAGE_SIZE; + size = n * PAGE_SIZE; + + mfns = malloc(n * sizeof(*mfns)); + for (i = 0; i < n; i++) { +#ifdef LIBC_DEBUG + int j; + for (j=0; j> 20, data); + + munmap((void *) data, size); + free_physical_pages(mfns, n); + do_map_zero(data, n); +} + +int nice(int inc) +{ + printk("nice() stub called with inc=%d\n", inc); + return 0; +} + + +/* Not supported by FS yet. */ +unsupported_function_crash(link); +unsupported_function(int, readlink, -1); +unsupported_function_crash(umask); + +/* We could support that. */ +unsupported_function_log(int, chdir, -1); + +/* No dynamic library support. */ +unsupported_function_log(void *, dlopen, NULL); +unsupported_function_log(void *, dlsym, NULL); +unsupported_function_log(char *, dlerror, NULL); +unsupported_function_log(int, dlclose, -1); + +/* We don't raise signals anyway. */ +unsupported_function(int, sigemptyset, -1); +unsupported_function(int, sigfillset, -1); +unsupported_function(int, sigaddset, -1); +unsupported_function(int, sigdelset, -1); +unsupported_function(int, sigismember, -1); +unsupported_function(int, sigprocmask, -1); +unsupported_function(int, sigaction, -1); +unsupported_function(int, __sigsetjmp, 0); +unsupported_function(int, sigaltstack, -1); +unsupported_function_crash(kill); + +/* Unsupported */ +unsupported_function_crash(pipe); +unsupported_function_crash(fork); +unsupported_function_crash(execv); +unsupported_function_crash(execve); +unsupported_function_crash(waitpid); +unsupported_function_crash(wait); +unsupported_function_crash(lockf); +unsupported_function_crash(sysconf); +unsupported_function(int, tcsetattr, -1); +unsupported_function(int, tcgetattr, 0); +unsupported_function(int, grantpt, -1); +unsupported_function(int, unlockpt, -1); +unsupported_function(char *, ptsname, NULL); + +/* net/if.h */ +unsupported_function_log(unsigned int, if_nametoindex, -1); +unsupported_function_log(char *, if_indextoname, (char *) NULL); +unsupported_function_log(struct if_nameindex *, if_nameindex, (struct if_nameindex *) NULL); +unsupported_function_crash(if_freenameindex); + +/* Linuxish abi for the Caml runtime, don't support + Log, and return an error code if possible. If it is not possible + to inform the application of an error, then crash instead! +*/ +unsupported_function_log(struct dirent *, readdir64, NULL); +unsupported_function_log(int, getrusage, -1); +unsupported_function_log(int, getrlimit, -1); +unsupported_function_log(int, getrlimit64, -1); +unsupported_function_log(int, __xstat64, -1); +unsupported_function_log(long, __strtol_internal, LONG_MIN); +unsupported_function_log(double, __strtod_internal, HUGE_VAL); +unsupported_function_log(int, utime, -1); +unsupported_function_log(int, truncate64, -1); +unsupported_function_log(int, tcflow, -1); +unsupported_function_log(int, tcflush, -1); +unsupported_function_log(int, tcdrain, -1); +unsupported_function_log(int, tcsendbreak, -1); +unsupported_function_log(int, cfsetospeed, -1); +unsupported_function_log(int, cfsetispeed, -1); +unsupported_function_crash(cfgetospeed); +unsupported_function_crash(cfgetispeed); +unsupported_function_log(int, symlink, -1); +unsupported_function_log(const char*, inet_ntop, NULL); +unsupported_function_crash(__fxstat64); +unsupported_function_crash(__lxstat64); +unsupported_function_log(int, socketpair, -1); +unsupported_function_crash(sigsuspend); +unsupported_function_log(int, sigpending, -1); +unsupported_function_log(int, shutdown, -1); +unsupported_function_log(int, setuid, -1); +unsupported_function_log(int, setgid, -1); +unsupported_function_crash(rewinddir); +unsupported_function_log(int, getpriority, -1); +unsupported_function_log(int, setpriority, -1); +unsupported_function_log(int, mkfifo, -1); +unsupported_function_log(int, getitimer, -1); +unsupported_function_log(int, setitimer, -1); +unsupported_function_log(void *, getservbyport, NULL); +unsupported_function_log(void *, getservbyname, NULL); +unsupported_function_log(void *, getpwuid, NULL); +unsupported_function_log(void *, getpwnam, NULL); +unsupported_function_log(void *, getprotobynumber, NULL); +unsupported_function_log(void *, getprotobyname, NULL); +unsupported_function_log(int, getpeername, -1); +unsupported_function_log(int, getnameinfo, -1); +unsupported_function_log(char *, getlogin, NULL); +unsupported_function_crash(__h_errno_location); +unsupported_function_log(int, gethostbyname_r, -1); +unsupported_function_log(int, gethostbyaddr_r, -1); +unsupported_function_log(int, getgroups, -1); +unsupported_function_log(void *, getgrgid, NULL); +unsupported_function_log(void *, getgrnam, NULL); +unsupported_function_log(int, getaddrinfo, -1); +unsupported_function_log(int, freeaddrinfo, -1); +unsupported_function_log(int, ftruncate64, -1); +unsupported_function_log(int, fchown, -1); +unsupported_function_log(int, fchmod, -1); +unsupported_function_crash(execvp); +unsupported_function_log(int, dup, -1) +unsupported_function_log(int, chroot, -1) +unsupported_function_log(int, chown, -1); +unsupported_function_log(int, chmod, -1); +unsupported_function_crash(alarm); +unsupported_function_log(int, inet_pton, -1); +unsupported_function_log(int, access, -1); +#endif diff -Nru xen-4.9.0/extras/mini-os/lib/xmalloc.c xen-4.9.2/extras/mini-os/lib/xmalloc.c --- xen-4.9.0/extras/mini-os/lib/xmalloc.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/lib/xmalloc.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,319 @@ +/* + **************************************************************************** + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: xmaloc.c + * Author: Grzegorz Milos (gm281@cam.ac.uk) + * Samuel Thibault (samuel.thibault@eu.citrix.com) + * Changes: + * + * Date: Aug 2005 + * Jan 2008 + * + * Environment: Xen Minimal OS + * Description: simple memory allocator + * + **************************************************************************** + * Simple allocator for Mini-os. If larger than a page, simply use the + * page-order allocator. + * + * Copy of the allocator for Xen by Rusty Russell: + * Copyright (C) 2005 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include + +#ifndef HAVE_LIBC +/* static spinlock_t freelist_lock = SPIN_LOCK_UNLOCKED; */ + +struct xmalloc_hdr +{ + /* Total including this hdr, unused padding and second hdr. */ + size_t size; + MINIOS_TAILQ_ENTRY(struct xmalloc_hdr) freelist; +} __cacheline_aligned; + +static MINIOS_TAILQ_HEAD(,struct xmalloc_hdr) freelist = + MINIOS_TAILQ_HEAD_INITIALIZER(freelist); + +/* Unused padding data between the two hdrs. */ + +struct xmalloc_pad +{ + /* Size including both hdrs. */ + size_t hdr_size; +}; + +/* Return size, increased to alignment with align. */ +static inline size_t align_up(size_t size, size_t align) +{ + return (size + align - 1) & ~(align - 1); +} + +static void maybe_split(struct xmalloc_hdr *hdr, size_t size, size_t block) +{ + struct xmalloc_hdr *extra; + size_t leftover; + size = align_up(size, __alignof__(struct xmalloc_hdr)); + size = align_up(size, __alignof__(struct xmalloc_pad)); + leftover = block - size; + + /* If enough is left to make a block, put it on free list. */ + if ( leftover >= (2 * (sizeof(struct xmalloc_hdr) + sizeof(struct xmalloc_pad))) ) + { + extra = (struct xmalloc_hdr *)((unsigned long)hdr + size); + extra->size = leftover; + /* spin_lock_irqsave(&freelist_lock, flags); */ + MINIOS_TAILQ_INSERT_HEAD(&freelist, extra, freelist); + /* spin_unlock_irqrestore(&freelist_lock, flags); */ + } + else + { + size = block; + } + + hdr->size = size; +} + +static struct xmalloc_hdr *xmalloc_new_page(size_t size) +{ + struct xmalloc_hdr *hdr; + /* unsigned long flags; */ + + hdr = (struct xmalloc_hdr *)alloc_page(); + if ( hdr == NULL ) + return NULL; + + maybe_split(hdr, size, PAGE_SIZE); + + return hdr; +} + +/* Big object? Just use the page allocator. */ +static void *xmalloc_whole_pages(size_t size, size_t align) +{ + struct xmalloc_hdr *hdr; + struct xmalloc_pad *pad; + unsigned int pageorder; + void *ret; + /* Room for headers */ + size_t hdr_size = sizeof(struct xmalloc_hdr) + sizeof(struct xmalloc_pad); + /* Align for actual beginning of data */ + hdr_size = align_up(hdr_size, align); + + pageorder = get_order(hdr_size + size); + + hdr = (struct xmalloc_hdr *)alloc_pages(pageorder); + if ( hdr == NULL ) + return NULL; + + hdr->size = (1UL << (pageorder + PAGE_SHIFT)); + + ret = (char*)hdr + hdr_size; + pad = (struct xmalloc_pad *) ret - 1; + pad->hdr_size = hdr_size; + return ret; +} + +void *_xmalloc(size_t size, size_t align) +{ + struct xmalloc_hdr *i, *tmp, *hdr = NULL; + uintptr_t data_begin; + size_t hdr_size; + /* unsigned long flags; */ + + hdr_size = sizeof(struct xmalloc_hdr) + sizeof(struct xmalloc_pad); + /* Align on headers requirements. */ + align = align_up(align, __alignof__(struct xmalloc_hdr)); + align = align_up(align, __alignof__(struct xmalloc_pad)); + + /* For big allocs, give them whole pages. */ + if ( size + align_up(hdr_size, align) >= PAGE_SIZE ) + return xmalloc_whole_pages(size, align); + + /* Search free list. */ + /* spin_lock_irqsave(&freelist_lock, flags); */ + MINIOS_TAILQ_FOREACH_SAFE(i, &freelist, freelist, tmp) + { + data_begin = align_up((uintptr_t)i + hdr_size, align); + + if ( data_begin + size > (uintptr_t)i + i->size ) + continue; + + MINIOS_TAILQ_REMOVE(&freelist, i, freelist); + /* spin_unlock_irqrestore(&freelist_lock, flags); */ + + uintptr_t size_before = (data_begin - hdr_size) - (uintptr_t)i; + + if (size_before >= 2 * hdr_size) { + /* Worth splitting the beginning */ + struct xmalloc_hdr *new_i = (void*)(data_begin - hdr_size); + new_i->size = i->size - size_before; + i->size = size_before; + /* spin_lock_irqsave(&freelist_lock, flags); */ + MINIOS_TAILQ_INSERT_HEAD(&freelist, i, freelist); + /* spin_unlock_irqrestore(&freelist_lock, flags); */ + i = new_i; + } + maybe_split(i, (data_begin + size) - (uintptr_t)i, i->size); + hdr = i; + break; + } + + if (!hdr) { + /* spin_unlock_irqrestore(&freelist_lock, flags); */ + + /* Alloc a new page and return from that. */ + hdr = xmalloc_new_page(align_up(hdr_size, align) + size); + if ( hdr == NULL ) + return NULL; + data_begin = (uintptr_t)hdr + align_up(hdr_size, align); + } + + struct xmalloc_pad *pad = (struct xmalloc_pad *) data_begin - 1; + pad->hdr_size = data_begin - (uintptr_t)hdr; + BUG_ON(data_begin % align); + return (void*)data_begin; +} + +void xfree(const void *p) +{ + /* unsigned long flags; */ + struct xmalloc_hdr *i, *tmp, *hdr; + struct xmalloc_pad *pad; + + if ( p == NULL ) + return; + + pad = (struct xmalloc_pad *)p - 1; + hdr = (struct xmalloc_hdr *)((char *)p - pad->hdr_size); + + /* Big allocs free directly. */ + if ( hdr->size >= PAGE_SIZE ) + { + free_pages(hdr, get_order(hdr->size)); + return; + } + + /* We know hdr will be on same page. */ + if(((long)p & PAGE_MASK) != ((long)hdr & PAGE_MASK)) + { + printk("Header should be on the same page\n"); + *(int*)0=0; + } + + /* Merge with other free block, or put in list. */ + /* spin_lock_irqsave(&freelist_lock, flags); */ + MINIOS_TAILQ_FOREACH_SAFE(i, &freelist, freelist, tmp) + { + unsigned long _i = (unsigned long)i; + unsigned long _hdr = (unsigned long)hdr; + + /* Do not merge across page boundaries. */ + if ( ((_i ^ _hdr) & PAGE_MASK) != 0 ) + continue; + + /* We follow this block? Swallow it. */ + if ( (_i + i->size) == _hdr ) + { + MINIOS_TAILQ_REMOVE(&freelist, i, freelist); + i->size += hdr->size; + hdr = i; + } + + /* We precede this block? Swallow it. */ + if ( (_hdr + hdr->size) == _i ) + { + MINIOS_TAILQ_REMOVE(&freelist, i, freelist); + hdr->size += i->size; + } + } + + /* Did we merge an entire page? */ + if ( hdr->size == PAGE_SIZE ) + { + if((((unsigned long)hdr) & (PAGE_SIZE-1)) != 0) + { + printk("Bug\n"); + *(int*)0=0; + } + free_page(hdr); + } + else + { + MINIOS_TAILQ_INSERT_HEAD(&freelist, hdr, freelist); + } + + /* spin_unlock_irqrestore(&freelist_lock, flags); */ +} + +void *malloc(size_t size) +{ + return _xmalloc(size, DEFAULT_ALIGN); +} + +void *realloc(void *ptr, size_t size) +{ + void *new; + struct xmalloc_hdr *hdr; + struct xmalloc_pad *pad; + size_t old_data_size; + + if (ptr == NULL) + return _xmalloc(size, DEFAULT_ALIGN); + + pad = (struct xmalloc_pad *)ptr - 1; + hdr = (struct xmalloc_hdr *)((char*)ptr - pad->hdr_size); + + old_data_size = hdr->size - pad->hdr_size; + if ( old_data_size >= size ) + { + maybe_split(hdr, pad->hdr_size + size, hdr->size); + return ptr; + } + + new = _xmalloc(size, DEFAULT_ALIGN); + if (new == NULL) + return NULL; + + memcpy(new, ptr, old_data_size); + xfree(ptr); + + return new; +} + +void free(void *ptr) +{ + xfree(ptr); +} +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/lib/xs.c xen-4.9.2/extras/mini-os/lib/xs.c --- xen-4.9.0/extras/mini-os/lib/xs.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/lib/xs.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,194 @@ +/* + * libxs-compatible layer + * + * Samuel Thibault , 2007-2008 + * + * Mere wrapper around xenbus_* + */ + +#ifdef HAVE_LIBC +#include +#include +#include +#include +#include +#include + +static inline int _xs_fileno(struct xs_handle *h) { + return (intptr_t) h; +} + +struct xs_handle *xs_daemon_open() +{ + int fd = alloc_fd(FTYPE_XENBUS); + files[fd].xenbus.events = NULL; + printk("xs_daemon_open -> %d, %p\n", fd, &files[fd].xenbus.events); + return (void*)(intptr_t) fd; +} + +void xs_daemon_close(struct xs_handle *h) +{ + int fd = _xs_fileno(h); + struct xenbus_event *event, *next; + for (event = files[fd].xenbus.events; event; event = next) + { + next = event->next; + free(event); + } + files[fd].type = FTYPE_NONE; +} + +int xs_fileno(struct xs_handle *h) +{ + return _xs_fileno(h); +} + +void *xs_read(struct xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *len) +{ + char *value; + char *msg; + + msg = xenbus_read(t, path, &value); + if (msg) { + printk("xs_read(%s): %s\n", path, msg); + free(msg); + return NULL; + } + + if (len) + *len = strlen(value); + return value; +} + +bool xs_write(struct xs_handle *h, xs_transaction_t t, + const char *path, const void *data, unsigned int len) +{ + char value[len + 1]; + char *msg; + + memcpy(value, data, len); + value[len] = 0; + + msg = xenbus_write(t, path, value); + if (msg) { + printk("xs_write(%s): %s\n", path, msg); + free(msg); + return false; + } + return true; +} + +static bool xs_bool(char *reply) +{ + if (!reply) + return true; + free(reply); + return false; +} + +bool xs_rm(struct xs_handle *h, xs_transaction_t t, const char *path) +{ + return xs_bool(xenbus_rm(t, path)); +} + +static void *xs_talkv(struct xs_handle *h, xs_transaction_t t, + enum xsd_sockmsg_type type, + struct write_req *iovec, + unsigned int num_vecs, + unsigned int *len) +{ + struct xsd_sockmsg *msg; + void *ret; + + msg = xenbus_msg_reply(type, t, iovec, num_vecs); + ret = malloc(msg->len); + memcpy(ret, (char*) msg + sizeof(*msg), msg->len); + if (len) + *len = msg->len - 1; + free(msg); + return ret; +} + +static void *xs_single(struct xs_handle *h, xs_transaction_t t, + enum xsd_sockmsg_type type, + const char *string, + unsigned int *len) +{ + struct write_req iovec; + + iovec.data = (void *)string; + iovec.len = strlen(string) + 1; + + return xs_talkv(h, t, type, &iovec, 1, len); +} + +char *xs_get_domain_path(struct xs_handle *h, unsigned int domid) +{ + char domid_str[MAX_STRLEN(domid)]; + + sprintf(domid_str, "%u", domid); + + return xs_single(h, XBT_NULL, XS_GET_DOMAIN_PATH, domid_str, NULL); +} + +char **xs_directory(struct xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *num) +{ + char *msg; + char **entries, **res; + char *entry; + int i, n; + int size; + + msg = xenbus_ls(t, path, &res); + if (msg) { + printk("xs_directory(%s): %s\n", path, msg); + free(msg); + return NULL; + } + + size = 0; + for (n = 0; res[n]; n++) + size += strlen(res[n]) + 1; + + entries = malloc(n * sizeof(char *) + size); + entry = (char *) (&entries[n]); + + for (i = 0; i < n; i++) { + int l = strlen(res[i]) + 1; + memcpy(entry, res[i], l); + free(res[i]); + entries[i] = entry; + entry += l; + } + + *num = n; + free(res); + return entries; +} + +bool xs_watch(struct xs_handle *h, const char *path, const char *token) +{ + int fd = _xs_fileno(h); + printk("xs_watch(%s, %s)\n", path, token); + return xs_bool(xenbus_watch_path_token(XBT_NULL, path, token, &files[fd].xenbus.events)); +} + +char **xs_read_watch(struct xs_handle *h, unsigned int *num) +{ + int fd = _xs_fileno(h); + struct xenbus_event *event; + event = files[fd].xenbus.events; + files[fd].xenbus.events = event->next; + printk("xs_read_watch() -> %s %s\n", event->path, event->token); + *num = 2; + return (char **) &event->path; +} + +bool xs_unwatch(struct xs_handle *h, const char *path, const char *token) +{ + printk("xs_unwatch(%s, %s)\n", path, token); + return xs_bool(xenbus_unwatch_path_token(XBT_NULL, path, token)); +} +#endif diff -Nru xen-4.9.0/extras/mini-os/lock.c xen-4.9.2/extras/mini-os/lock.c --- xen-4.9.0/extras/mini-os/lock.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/lock.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,112 @@ +/* + * locks for newlib + * + * Samuel Thibault , July 20008 + */ + +#ifdef HAVE_LIBC + +#include +#include +#include +#include + +int ___lock_init(_LOCK_T *lock) +{ + lock->busy = 0; + init_waitqueue_head(&lock->wait); + return 0; +} + +int ___lock_acquire(_LOCK_T *lock) +{ + unsigned long flags; + while(1) { + wait_event(lock->wait, !lock->busy); + local_irq_save(flags); + if (!lock->busy) + break; + local_irq_restore(flags); + } + lock->busy = 1; + local_irq_restore(flags); + return 0; +} + +int ___lock_try_acquire(_LOCK_T *lock) +{ + unsigned long flags; + int ret = -1; + local_irq_save(flags); + if (!lock->busy) { + lock->busy = 1; + ret = 0; + } + local_irq_restore(flags); + return ret; +} + +int ___lock_release(_LOCK_T *lock) +{ + unsigned long flags; + local_irq_save(flags); + lock->busy = 0; + wake_up(&lock->wait); + local_irq_restore(flags); + return 0; +} + + +int ___lock_init_recursive(_LOCK_RECURSIVE_T *lock) +{ + lock->owner = NULL; + init_waitqueue_head(&lock->wait); + return 0; +} + +int ___lock_acquire_recursive(_LOCK_RECURSIVE_T *lock) +{ + unsigned long flags; + if (lock->owner != get_current()) { + while (1) { + wait_event(lock->wait, lock->owner == NULL); + local_irq_save(flags); + if (lock->owner == NULL) + break; + local_irq_restore(flags); + } + lock->owner = get_current(); + local_irq_restore(flags); + } + lock->count++; + return 0; +} + +int ___lock_try_acquire_recursive(_LOCK_RECURSIVE_T *lock) +{ + unsigned long flags; + int ret = -1; + local_irq_save(flags); + if (!lock->owner) { + ret = 0; + lock->owner = get_current(); + lock->count++; + } + local_irq_restore(flags); + return ret; +} + +int ___lock_release_recursive(_LOCK_RECURSIVE_T *lock) +{ + unsigned long flags; + BUG_ON(lock->owner != get_current()); + if (--lock->count) + return 0; + local_irq_save(flags); + lock->owner = NULL; + wake_up(&lock->wait); + local_irq_restore(flags); + return 0; +} + +#endif diff -Nru xen-4.9.0/extras/mini-os/lwip-arch.c xen-4.9.2/extras/mini-os/lwip-arch.c --- xen-4.9.0/extras/mini-os/lwip-arch.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/lwip-arch.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,294 @@ +/* + * lwip-arch.c + * + * Arch-specific semaphores and mailboxes for lwIP running on mini-os + * + * Tim Deegan , July 2007 + */ + +#include +#include +#include +#include +#include +#include + +/* Is called to initialize the sys_arch layer */ +void sys_init(void) +{ +} + +/* Creates and returns a new semaphore. The "count" argument specifies + * the initial state of the semaphore. */ +sys_sem_t sys_sem_new(uint8_t count) +{ + struct semaphore *sem = xmalloc(struct semaphore); + sem->count = count; + init_waitqueue_head(&sem->wait); + return sem; +} + +/* Deallocates a semaphore. */ +void sys_sem_free(sys_sem_t sem) +{ + xfree(sem); +} + +/* Signals a semaphore. */ +void sys_sem_signal(sys_sem_t sem) +{ + up(sem); +} + +/* Blocks the thread while waiting for the semaphore to be + * signaled. If the "timeout" argument is non-zero, the thread should + * only be blocked for the specified time (measured in + * milliseconds). + * + * If the timeout argument is non-zero, the return value is the number of + * milliseconds spent waiting for the semaphore to be signaled. If the + * semaphore wasn't signaled within the specified time, the return value is + * SYS_ARCH_TIMEOUT. If the thread didn't have to wait for the semaphore + * (i.e., it was already signaled), the function may return zero. */ +uint32_t sys_arch_sem_wait(sys_sem_t sem, uint32_t timeout) +{ + /* Slightly more complicated than the normal minios semaphore: + * need to wake on timeout *or* signal */ + sys_prot_t prot; + int64_t then = NOW(); + int64_t deadline; + + if (timeout == 0) + deadline = 0; + else + deadline = then + MILLISECS(timeout); + + while(1) { + wait_event_deadline(sem->wait, (sem->count > 0), deadline); + + prot = sys_arch_protect(); + /* Atomically check that we can proceed */ + if (sem->count > 0 || (deadline && NOW() >= deadline)) + break; + sys_arch_unprotect(prot); + } + + if (sem->count > 0) { + sem->count--; + sys_arch_unprotect(prot); + return NSEC_TO_MSEC(NOW() - then); + } + + sys_arch_unprotect(prot); + return SYS_ARCH_TIMEOUT; +} + +/* Creates an empty mailbox. */ +sys_mbox_t sys_mbox_new(int size) +{ + struct mbox *mbox = xmalloc(struct mbox); + if (!size) + size = 32; + else if (size == 1) + size = 2; + mbox->count = size; + mbox->messages = xmalloc_array(void*, size); + init_SEMAPHORE(&mbox->read_sem, 0); + mbox->reader = 0; + init_SEMAPHORE(&mbox->write_sem, size); + mbox->writer = 0; + return mbox; +} + +/* Deallocates a mailbox. If there are messages still present in the + * mailbox when the mailbox is deallocated, it is an indication of a + * programming error in lwIP and the developer should be notified. */ +void sys_mbox_free(sys_mbox_t mbox) +{ + ASSERT(mbox->reader == mbox->writer); + xfree(mbox->messages); + xfree(mbox); +} + +/* Posts the "msg" to the mailbox, internal version that actually does the + * post. */ +static void do_mbox_post(sys_mbox_t mbox, void *msg) +{ + /* The caller got a semaphore token, so we are now allowed to increment + * writer, but we still need to prevent concurrency between writers + * (interrupt handler vs main) */ + sys_prot_t prot = sys_arch_protect(); + mbox->messages[mbox->writer] = msg; + mbox->writer = (mbox->writer + 1) % mbox->count; + ASSERT(mbox->reader != mbox->writer); + sys_arch_unprotect(prot); + up(&mbox->read_sem); +} + +/* Posts the "msg" to the mailbox. */ +void sys_mbox_post(sys_mbox_t mbox, void *msg) +{ + if (mbox == SYS_MBOX_NULL) + return; + down(&mbox->write_sem); + do_mbox_post(mbox, msg); +} + +/* Try to post the "msg" to the mailbox. */ +err_t sys_mbox_trypost(sys_mbox_t mbox, void *msg) +{ + if (mbox == SYS_MBOX_NULL) + return ERR_BUF; + if (!trydown(&mbox->write_sem)) + return ERR_MEM; + do_mbox_post(mbox, msg); + return ERR_OK; +} + +/* + * Fetch a message from a mailbox. Internal version that actually does the + * fetch. + */ +static void do_mbox_fetch(sys_mbox_t mbox, void **msg) +{ + sys_prot_t prot; + /* The caller got a semaphore token, so we are now allowed to increment + * reader, but we may still need to prevent concurrency between readers. + * FIXME: can there be concurrent readers? */ + prot = sys_arch_protect(); + ASSERT(mbox->reader != mbox->writer); + if (msg != NULL) + *msg = mbox->messages[mbox->reader]; + mbox->reader = (mbox->reader + 1) % mbox->count; + sys_arch_unprotect(prot); + up(&mbox->write_sem); +} + +/* Blocks the thread until a message arrives in the mailbox, but does + * not block the thread longer than "timeout" milliseconds (similar to + * the sys_arch_sem_wait() function). The "msg" argument is a result + * parameter that is set by the function (i.e., by doing "*msg = + * ptr"). The "msg" parameter maybe NULL to indicate that the message + * should be dropped. + * + * The return values are the same as for the sys_arch_sem_wait() function: + * Number of milliseconds spent waiting or SYS_ARCH_TIMEOUT if there was a + * timeout. */ +uint32_t sys_arch_mbox_fetch(sys_mbox_t mbox, void **msg, uint32_t timeout) +{ + uint32_t rv; + if (mbox == SYS_MBOX_NULL) + return SYS_ARCH_TIMEOUT; + + rv = sys_arch_sem_wait(&mbox->read_sem, timeout); + if ( rv == SYS_ARCH_TIMEOUT ) + return rv; + + do_mbox_fetch(mbox, msg); + return 0; +} + +/* This is similar to sys_arch_mbox_fetch, however if a message is not + * present in the mailbox, it immediately returns with the code + * SYS_MBOX_EMPTY. On success 0 is returned. + * + * To allow for efficient implementations, this can be defined as a + * function-like macro in sys_arch.h instead of a normal function. For + * example, a naive implementation could be: + * #define sys_arch_mbox_tryfetch(mbox,msg) \ + * sys_arch_mbox_fetch(mbox,msg,1) + * although this would introduce unnecessary delays. */ + +uint32_t sys_arch_mbox_tryfetch(sys_mbox_t mbox, void **msg) { + if (mbox == SYS_MBOX_NULL) + return SYS_ARCH_TIMEOUT; + + if (!trydown(&mbox->read_sem)) + return SYS_MBOX_EMPTY; + + do_mbox_fetch(mbox, msg); + return 0; +} + + +/* Returns a pointer to the per-thread sys_timeouts structure. In lwIP, + * each thread has a list of timeouts which is repressented as a linked + * list of sys_timeout structures. The sys_timeouts structure holds a + * pointer to a linked list of timeouts. This function is called by + * the lwIP timeout scheduler and must not return a NULL value. + * + * In a single threadd sys_arch implementation, this function will + * simply return a pointer to a global sys_timeouts variable stored in + * the sys_arch module. */ +struct sys_timeouts *sys_arch_timeouts(void) +{ + static struct sys_timeouts timeout; + return &timeout; +} + + +/* Starts a new thread with priority "prio" that will begin its execution in the + * function "thread()". The "arg" argument will be passed as an argument to the + * thread() function. The id of the new thread is returned. Both the id and + * the priority are system dependent. */ +static struct thread *lwip_thread; +sys_thread_t sys_thread_new(char *name, void (* thread)(void *arg), void *arg, int stacksize, int prio) +{ + struct thread *t; + if (stacksize > STACK_SIZE) { + printk("Can't start lwIP thread: stack size %d is too large for our %lu\n", + stacksize, (unsigned long) STACK_SIZE); + do_exit(); + } + lwip_thread = t = create_thread(name, thread, arg); + return t; +} + +/* This optional function does a "fast" critical region protection and returns + * the previous protection level. This function is only called during very short + * critical regions. An embedded system which supports ISR-based drivers might + * want to implement this function by disabling interrupts. Task-based systems + * might want to implement this by using a mutex or disabling tasking. This + * function should support recursive calls from the same task or interrupt. In + * other words, sys_arch_protect() could be called while already protected. In + * that case the return value indicates that it is already protected. + * + * sys_arch_protect() is only required if your port is supporting an operating + * system. */ +sys_prot_t sys_arch_protect(void) +{ + unsigned long flags; + local_irq_save(flags); + return flags; +} + +/* This optional function does a "fast" set of critical region protection to the + * value specified by pval. See the documentation for sys_arch_protect() for + * more information. This function is only required if your port is supporting + * an operating system. */ +void sys_arch_unprotect(sys_prot_t pval) +{ + local_irq_restore(pval); +} + +/* non-fatal, print a message. */ +void lwip_printk(char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + printk("lwIP: "); + print(0, fmt, args); + va_end(args); +} + +/* fatal, print message and abandon execution. */ +void lwip_die(char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + printk("lwIP assertion failed: "); + print(0, fmt, args); + va_end(args); + printk("\n"); + BUG(); +} diff -Nru xen-4.9.0/extras/mini-os/lwip-net.c xen-4.9.2/extras/mini-os/lwip-net.c --- xen-4.9.0/extras/mini-os/lwip-net.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/lwip-net.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,386 @@ +/* + * lwip-net.c + * + * interface between lwIP's ethernet and Mini-os's netfront. + * For now, support only one network interface, as mini-os does. + * + * Tim Deegan , July 2007 + * based on lwIP's ethernetif.c skeleton file, copyrights as below. + */ + + +/* + * Copyright (c) 2001-2004 Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY + * OF SUCH DAMAGE. + * + * This file is part of the lwIP TCP/IP stack. + * + * Author: Adam Dunkels + * + */ + +#include + +#include "lwip/opt.h" +#include "lwip/def.h" +#include "lwip/mem.h" +#include "lwip/pbuf.h" +#include "lwip/sys.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "netif/etharp.h" + +#include + +/* Define those to better describe your network interface. */ +#define IFNAME0 'e' +#define IFNAME1 'n' + +#define IF_IPADDR 0x00000000 +#define IF_NETMASK 0x00000000 + +/* Only have one network interface at a time. */ +static struct netif *the_interface = NULL; + +static unsigned char rawmac[6]; +static struct netfront_dev *dev; + +/* Forward declarations. */ +static err_t netfront_output(struct netif *netif, struct pbuf *p, + struct ip_addr *ipaddr); + +/* + * low_level_output(): + * + * Should do the actual transmission of the packet. The packet is + * contained in the pbuf that is passed to the function. This pbuf + * might be chained. + * + */ + +static err_t +low_level_output(struct netif *netif, struct pbuf *p) +{ + if (!dev) + return ERR_OK; + +#ifdef ETH_PAD_SIZE + pbuf_header(p, -ETH_PAD_SIZE); /* drop the padding word */ +#endif + + /* Send the data from the pbuf to the interface, one pbuf at a + time. The size of the data in each pbuf is kept in the ->len + variable. */ + if (!p->next) { + /* Only one fragment, can send it directly */ + netfront_xmit(dev, p->payload, p->len); + } else { + unsigned char data[p->tot_len], *cur; + struct pbuf *q; + + for(q = p, cur = data; q != NULL; cur += q->len, q = q->next) + memcpy(cur, q->payload, q->len); + netfront_xmit(dev, data, p->tot_len); + } + +#if ETH_PAD_SIZE + pbuf_header(p, ETH_PAD_SIZE); /* reclaim the padding word */ +#endif + + LINK_STATS_INC(link.xmit); + + return ERR_OK; +} + + + +/* + * netfront_output(): + * + * This function is called by the TCP/IP stack when an IP packet + * should be sent. It calls the function called low_level_output() to + * do the actual transmission of the packet. + * + */ + +static err_t +netfront_output(struct netif *netif, struct pbuf *p, + struct ip_addr *ipaddr) +{ + + /* resolve hardware address, then send (or queue) packet */ + return etharp_output(netif, p, ipaddr); + +} + +/* + * netfront_input(): + * + * This function should be called when a packet is ready to be read + * from the interface. + * + */ + +static void +netfront_input(struct netif *netif, unsigned char* data, int len) +{ + struct eth_hdr *ethhdr; + struct pbuf *p, *q; + +#if ETH_PAD_SIZE + len += ETH_PAD_SIZE; /* allow room for Ethernet padding */ +#endif + + /* move received packet into a new pbuf */ + p = pbuf_alloc(PBUF_RAW, len, PBUF_POOL); + if (p == NULL) { + LINK_STATS_INC(link.memerr); + LINK_STATS_INC(link.drop); + return; + } + +#if ETH_PAD_SIZE + pbuf_header(p, -ETH_PAD_SIZE); /* drop the padding word */ +#endif + + /* We iterate over the pbuf chain until we have read the entire + * packet into the pbuf. */ + for(q = p; q != NULL && len > 0; q = q->next) { + /* Read enough bytes to fill this pbuf in the chain. The + * available data in the pbuf is given by the q->len + * variable. */ + memcpy(q->payload, data, len < q->len ? len : q->len); + data += q->len; + len -= q->len; + } + +#if ETH_PAD_SIZE + pbuf_header(p, ETH_PAD_SIZE); /* reclaim the padding word */ +#endif + + LINK_STATS_INC(link.recv); + + /* points to packet payload, which starts with an Ethernet header */ + ethhdr = p->payload; + + switch (htons(ethhdr->type)) { + /* IP packet? */ + case ETHTYPE_IP: +#if 0 +/* CSi disabled ARP table update on ingress IP packets. + This seems to work but needs thorough testing. */ + /* update ARP table */ + etharp_ip_input(netif, p); +#endif + /* skip Ethernet header */ + pbuf_header(p, -(int16_t)sizeof(struct eth_hdr)); + /* pass to network layer */ + if (tcpip_input(p, netif) == ERR_MEM) + /* Could not store it, drop */ + pbuf_free(p); + break; + + case ETHTYPE_ARP: + /* pass p to ARP module */ + etharp_arp_input(netif, (struct eth_addr *) netif->hwaddr, p); + break; + + default: + pbuf_free(p); + p = NULL; + break; + } +} + + +/* + * netif_rx(): overrides the default netif_rx behaviour in the netfront driver. + * + * Pull received packets into a pbuf queue for the low_level_input() + * function to pass up to lwIP. + */ + +void netif_rx(unsigned char* data, int len) +{ + if (the_interface != NULL) { + netfront_input(the_interface, data, len); + wake_up(&netfront_queue); + } + /* By returning, we ack the packet and relinquish the RX ring slot */ +} + +/* + * Set the IP, mask and gateway of the IF + */ +void networking_set_addr(struct ip_addr *ipaddr, struct ip_addr *netmask, struct ip_addr *gw) +{ + netif_set_ipaddr(the_interface, ipaddr); + netif_set_netmask(the_interface, netmask); + netif_set_gw(the_interface, gw); +} + + +static void +arp_timer(void *arg) +{ + etharp_tmr(); + sys_timeout(ARP_TMR_INTERVAL, arp_timer, NULL); +} + +/* + * netif_netfront_init(): + * + * Should be called at the beginning of the program to set up the + * network interface. It calls the function low_level_init() to do the + * actual setup of the hardware. + * + */ + +err_t +netif_netfront_init(struct netif *netif) +{ + unsigned char *mac = netif->state; + +#if LWIP_SNMP + /* ifType ethernetCsmacd(6) @see RFC1213 */ + netif->link_type = 6; + /* your link speed here */ + netif->link_speed = ; + netif->ts = 0; + netif->ifinoctets = 0; + netif->ifinucastpkts = 0; + netif->ifinnucastpkts = 0; + netif->ifindiscards = 0; + netif->ifoutoctets = 0; + netif->ifoutucastpkts = 0; + netif->ifoutnucastpkts = 0; + netif->ifoutdiscards = 0; +#endif + + netif->name[0] = IFNAME0; + netif->name[1] = IFNAME1; + netif->output = netfront_output; + netif->linkoutput = low_level_output; + + the_interface = netif; + + /* set MAC hardware address */ + netif->hwaddr_len = 6; + netif->hwaddr[0] = mac[0]; + netif->hwaddr[1] = mac[1]; + netif->hwaddr[2] = mac[2]; + netif->hwaddr[3] = mac[3]; + netif->hwaddr[4] = mac[4]; + netif->hwaddr[5] = mac[5]; + + /* No interesting per-interface state */ + netif->state = NULL; + + /* maximum transfer unit */ + netif->mtu = 1500; + + /* broadcast capability */ + netif->flags = NETIF_FLAG_BROADCAST; + + etharp_init(); + + sys_timeout(ARP_TMR_INTERVAL, arp_timer, NULL); + + return ERR_OK; +} + +/* + * Thread run by netfront: bring up the IP address and fire lwIP timers. + */ +static __DECLARE_SEMAPHORE_GENERIC(tcpip_is_up, 0); +static void tcpip_bringup_finished(void *p) +{ + tprintk("TCP/IP bringup ends.\n"); + up(&tcpip_is_up); +} + +/* + * Utility function to bring the whole lot up. Call this from app_main() + * or similar -- it starts netfront and have lwIP start its thread, + * which calls back to tcpip_bringup_finished(), which + * lets us know it's OK to continue. + */ +void start_networking(void) +{ + struct netif *netif; + struct ip_addr ipaddr = { htonl(IF_IPADDR) }; + struct ip_addr netmask = { htonl(IF_NETMASK) }; + struct ip_addr gw = { 0 }; + char *ip = NULL; + + tprintk("Waiting for network.\n"); + + dev = init_netfront(NULL, NULL, rawmac, &ip); + + if (ip) { + ipaddr.addr = inet_addr(ip); + if (IN_CLASSA(ntohl(ipaddr.addr))) + netmask.addr = htonl(IN_CLASSA_NET); + else if (IN_CLASSB(ntohl(ipaddr.addr))) + netmask.addr = htonl(IN_CLASSB_NET); + else if (IN_CLASSC(ntohl(ipaddr.addr))) + netmask.addr = htonl(IN_CLASSC_NET); + else + tprintk("Strange IP %s, leaving netmask to 0.\n", ip); + } + tprintk("IP %x netmask %x gateway %x.\n", + ntohl(ipaddr.addr), ntohl(netmask.addr), ntohl(gw.addr)); + + tprintk("TCP/IP bringup begins.\n"); + + netif = xmalloc(struct netif); + tcpip_init(tcpip_bringup_finished, netif); + + netif_add(netif, &ipaddr, &netmask, &gw, rawmac, + netif_netfront_init, ip_input); + netif_set_default(netif); + netif_set_up(netif); + + down(&tcpip_is_up); + + tprintk("Network is ready.\n"); +} + +/* Shut down the network */ +void stop_networking(void) +{ + if (dev) + shutdown_netfront(dev); +} diff -Nru xen-4.9.0/extras/mini-os/main.c xen-4.9.2/extras/mini-os/main.c --- xen-4.9.0/extras/mini-os/main.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/main.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,194 @@ +/* + * POSIX-compatible main layer + * + * Samuel Thibault , October 2007 + */ + +#ifdef HAVE_LIBC +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern int main(int argc, char *argv[], char *envp[]); +extern void __libc_init_array(void); +extern void __libc_fini_array(void); +extern unsigned long __CTOR_LIST__[]; +extern unsigned long __DTOR_LIST__[]; + +#if 0 +#include +int main(int argc, char *argv[], char *envp[]) +{ + printf("Hello, World!\n"); + return 1; +} +#endif + +void _init(void) +{ +} + +void _fini(void) +{ +} + +extern char __app_bss_start, __app_bss_end; +static void call_main(void *p) +{ + char *c, quote; +#ifdef CONFIG_QEMU_XS_ARGS + char *domargs, *msg; +#endif + int argc; + char **argv; + char *envp[] = { NULL }; +#ifdef CONFIG_QEMU_XS_ARGS + char *vm; + char path[128]; + int domid; +#endif + int i; + + /* Let other parts initialize (including console output) before maybe + * crashing. */ + //sleep(1); + +#ifdef CONFIG_SPARSE_BSS + sparse((unsigned long) &__app_bss_start, &__app_bss_end - &__app_bss_start); +#endif +#if defined(HAVE_LWIP) && defined(CONFIG_START_NETWORK) && defined(CONFIG_NETFRONT) + start_networking(); +#endif +#ifdef CONFIG_PCIFRONT + create_thread("pcifront", pcifront_watches, NULL); +#endif + +#ifdef CONFIG_QEMU_XS_ARGS + /* Fetch argc, argv from XenStore */ + domid = xenbus_read_integer("target"); + if (domid == -1) { + printk("Couldn't read target\n"); + do_exit(); + } + + snprintf(path, sizeof(path), "/local/domain/%d/vm", domid); + msg = xenbus_read(XBT_NIL, path, &vm); + if (msg) { + printk("Couldn't read vm path\n"); + do_exit(); + } + printk("dom vm is at %s\n", vm); + + snprintf(path, sizeof(path), "%s/image/dmargs", vm); + free(vm); + msg = xenbus_read(XBT_NIL, path, &domargs); + + if (msg) { + printk("Couldn't get stubdom args: %s\n", msg); + domargs = strdup(""); + } +#endif + + argc = 1; + +#define PARSE_ARGS(ARGS,START,QUOTE,END) \ + c = ARGS; \ + quote = 0; \ + while (*c) { \ + if (*c != ' ') { \ + START; \ + while (*c) { \ + if (quote) { \ + if (*c == quote) { \ + quote = 0; \ + QUOTE; \ + continue; \ + } \ + } else if (*c == ' ') \ + break; \ + if (*c == '"' || *c == '\'') { \ + quote = *c; \ + QUOTE; \ + continue; \ + } \ + c++; \ + } \ + } else { \ + END; \ + while (*c == ' ') \ + c++; \ + } \ + } \ + if (quote) {\ + printk("Warning: unterminated quotation %c\n", quote); \ + quote = 0; \ + } +#define PARSE_ARGS_COUNT(ARGS) PARSE_ARGS(ARGS, argc++, c++, ) +#define PARSE_ARGS_STORE(ARGS) PARSE_ARGS(ARGS, argv[argc++] = c, memmove(c, c + 1, strlen(c + 1) + 1), *c++ = 0) + + PARSE_ARGS_COUNT(cmdline); +#ifdef CONFIG_QEMU_XS_ARGS + PARSE_ARGS_COUNT(domargs); +#endif + + argv = alloca((argc + 1) * sizeof(char *)); + argv[0] = "main"; + argc = 1; + + PARSE_ARGS_STORE(cmdline) +#ifdef CONFIG_QEMU_XS_ARGS + PARSE_ARGS_STORE(domargs) +#endif + + argv[argc] = NULL; + + for (i = 0; i < argc; i++) + printf("\"%s\" ", argv[i]); + printf("\n"); + + __libc_init_array(); + environ = envp; + for (i = 0; __CTOR_LIST__[i] != 0; i++) + ((void((*)(void)))__CTOR_LIST__[i]) (); + tzset(); + + exit(main(argc, argv, envp)); +} + +void _exit(int ret) +{ + int i; + + for (i = 0; __DTOR_LIST__[i] != 0; i++) + ((void((*)(void)))__DTOR_LIST__[i]) (); + close_all_files(); + __libc_fini_array(); + printk("main returned %d\n", ret); +#if defined(HAVE_LWIP) && defined(CONFIG_NETFRONT) + stop_networking(); +#endif + stop_kernel(); + if (!ret) { + /* No problem, just shutdown. */ + struct sched_shutdown sched_shutdown = { .reason = SHUTDOWN_poweroff }; + HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); + } + do_exit(); +} + +int app_main(void *p) +{ + printk("main.c: dummy main: par=%p\n", p); + main_thread = create_thread("main", call_main, p); + return 0; +} +#endif diff -Nru xen-4.9.0/extras/mini-os/Makefile xen-4.9.2/extras/mini-os/Makefile --- xen-4.9.0/extras/mini-os/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/Makefile 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,220 @@ +# Common Makefile for mini-os. +# +# Every architecture directory below mini-os/arch has to have a +# Makefile and a arch.mk. +# + +OBJ_DIR=$(CURDIR) +TOPLEVEL_DIR=$(CURDIR) + +include Config.mk + +# Symlinks and headers that must be created before building the C files +GENERATED_HEADERS := include/list.h $(ARCH_LINKS) include/mini-os include/$(TARGET_ARCH_FAM)/mini-os + +EXTRA_DEPS += $(GENERATED_HEADERS) + +# Include common mini-os makerules. +include minios.mk + +# Set tester flags +# CFLAGS += -DBLKTEST_WRITE + +# Define some default flags for linking. +LDLIBS := +APP_LDLIBS := +LDARCHLIB := -L$(OBJ_DIR)/$(TARGET_ARCH_DIR) -l$(ARCH_LIB_NAME) +LDFLAGS_FINAL := -T $(OBJ_DIR)/$(TARGET_ARCH_DIR)/minios-$(MINIOS_TARGET_ARCH).lds $(ARCH_LDFLAGS_FINAL) + +# Prefix for global API names. All other symbols are localised before +# linking with EXTRA_OBJS. +GLOBAL_PREFIX := xenos_ +EXTRA_OBJS = + +TARGET := mini-os + +# Subdirectories common to mini-os +SUBDIRS := lib xenbus console + +src-$(CONFIG_BLKFRONT) += blkfront.c +src-$(CONFIG_TPMFRONT) += tpmfront.c +src-$(CONFIG_TPM_TIS) += tpm_tis.c +src-$(CONFIG_TPMBACK) += tpmback.c +src-y += daytime.c +src-y += events.c +src-$(CONFIG_FBFRONT) += fbfront.c +src-y += gntmap.c +src-y += gnttab.c +src-y += hypervisor.c +src-y += kernel.c +src-y += lock.c +src-y += main.c +src-y += mm.c +src-$(CONFIG_NETFRONT) += netfront.c +src-$(CONFIG_PCIFRONT) += pcifront.c +src-y += sched.c +src-$(CONFIG_TEST) += test.c +src-$(CONFIG_BALLOON) += balloon.c + +src-y += lib/ctype.c +src-y += lib/math.c +src-y += lib/printf.c +src-y += lib/stack_chk_fail.c +src-y += lib/string.c +src-y += lib/sys.c +src-y += lib/xmalloc.c +src-$(CONFIG_XENBUS) += lib/xs.c + +src-$(CONFIG_XENBUS) += xenbus/xenbus.c + +src-y += console/console.c +src-y += console/xencons_ring.c +src-$(CONFIG_CONSFRONT) += console/xenbus.c + +# The common mini-os objects to build. +APP_OBJS := +OBJS := $(patsubst %.c,$(OBJ_DIR)/%.o,$(src-y)) + +.PHONY: default +default: $(OBJ_DIR)/$(TARGET) + +# Create special architecture specific links. The function arch_links +# has to be defined in arch.mk (see include above). +ifneq ($(ARCH_LINKS),) +$(ARCH_LINKS): + $(arch_links) +endif + +include/list.h: include/minios-external/bsd-sys-queue-h-seddery include/minios-external/bsd-sys-queue.h + perl $^ --prefix=minios >$@.new + $(call move-if-changed,$@.new,$@) + +# Used by stubdom's Makefile +.PHONY: links +links: $(GENERATED_HEADERS) + +include/mini-os: + ln -sf . $@ + +include/$(TARGET_ARCH_FAM)/mini-os: + ln -sf . $@ + +.PHONY: arch_lib +arch_lib: + $(MAKE) --directory=$(TARGET_ARCH_DIR) OBJ_DIR=$(OBJ_DIR)/$(TARGET_ARCH_DIR) || exit 1; + +ifeq ($(CONFIG_LWIP),y) +# lwIP library +LWC := $(sort $(shell find $(LWIPDIR)/src -type f -name '*.c')) +LWC := $(filter-out %6.c %ip6_addr.c %ethernetif.c, $(LWC)) +LWO := $(patsubst %.c,%.o,$(LWC)) +LWO += $(OBJ_DIR)/lwip-arch.o +ifeq ($(CONFIG_NETFRONT),y) +LWO += $(OBJ_DIR)/lwip-net.o +endif + +$(OBJ_DIR)/lwip.a: $(LWO) + $(RM) $@ + $(AR) cqs $@ $^ + +OBJS += $(OBJ_DIR)/lwip.a +endif + +OBJS := $(filter-out $(OBJ_DIR)/lwip%.o $(LWO), $(OBJS)) + +ifeq ($(libc),y) +ifeq ($(CONFIG_XC),y) +APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/toollog -whole-archive -lxentoollog -no-whole-archive +LIBS += $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/toollog/libxentoollog.a +APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/evtchn -whole-archive -lxenevtchn -no-whole-archive +LIBS += $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/evtchn/libxenevtchn.a +APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/gnttab -whole-archive -lxengnttab -no-whole-archive +LIBS += $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/gnttab/libxengnttab.a +APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/call -whole-archive -lxencall -no-whole-archive +LIBS += $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/call/libxencall.a +APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/foreignmemory -whole-archive -lxenforeignmemory -no-whole-archive +LIBS += $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/foreignmemory/libxenforeignmemory.a +APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/devicemodel -whole-archive -lxendevicemodel -no-whole-archive +LIBS += $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/devicemodel/libxendevicemodel.a +APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libxc-$(MINIOS_TARGET_ARCH) -whole-archive -lxenguest -lxenctrl -no-whole-archive +LIBS += $(XEN_ROOT)/stubdom/libxc-$(MINIOS_TARGET_ARCH)/libxenctrl.a +LIBS += $(XEN_ROOT)/stubdom/libxc-$(MINIOS_TARGET_ARCH)/libxenguest.a +endif +APP_LDLIBS += -lpci +APP_LDLIBS += -lz +APP_LDLIBS += -lm +LDLIBS += -lc +endif + +ifneq ($(APP_OBJS)-$(lwip),-y) +OBJS := $(filter-out $(OBJ_DIR)/daytime.o, $(OBJS)) +endif + +$(OBJ_DIR)/$(TARGET)_app.o: $(APP_OBJS) app.lds $(LIBS) + $(LD) -r -d $(LDFLAGS) -\( $(APP_OBJS) -T app.lds -\) $(APP_LDLIBS) --undefined main -o $@ + +ifneq ($(APP_OBJS),) +APP_O=$(OBJ_DIR)/$(TARGET)_app.o +endif + +# Special rule for x86 for now +$(OBJ_DIR)/arch/x86/minios-x86%.lds: arch/x86/minios-x86.lds.S + $(CPP) $(ASFLAGS) -P $< -o $@ + +$(OBJ_DIR)/$(TARGET): $(OBJS) $(APP_O) arch_lib $(OBJ_DIR)/$(TARGET_ARCH_DIR)/minios-$(MINIOS_TARGET_ARCH).lds + $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(APP_O) $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o + $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o + $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@ + gzip -f -9 -c $@ >$@.gz + +.PHONY: config +CONFIG_FILE ?= $(CURDIR)/minios-config.mk +config: + echo "$(DEFINES-y)" >$(CONFIG_FILE) + +.PHONY: clean arch_clean + +arch_clean: + $(MAKE) --directory=$(TARGET_ARCH_DIR) OBJ_DIR=$(OBJ_DIR)/$(TARGET_ARCH_DIR) clean || exit 1; + +clean: arch_clean + for dir in $(addprefix $(OBJ_DIR)/,$(SUBDIRS)); do \ + rm -f $$dir/*.o; \ + done + rm -f include/list.h + rm -f $(OBJ_DIR)/*.o *~ $(OBJ_DIR)/core $(OBJ_DIR)/$(TARGET).elf $(OBJ_DIR)/$(TARGET).raw $(OBJ_DIR)/$(TARGET) $(OBJ_DIR)/$(TARGET).gz + find . $(OBJ_DIR) -type l | xargs rm -f + $(RM) $(OBJ_DIR)/lwip.a $(LWO) + rm -f tags TAGS + +.PHONY: testbuild +TEST_CONFIGS := $(wildcard $(CURDIR)/$(TARGET_ARCH_DIR)/testbuild/*) +testbuild: + for arch in $(MINIOS_TARGET_ARCHS); do \ + for conf in $(TEST_CONFIGS); do \ + $(MAKE) clean; \ + MINIOS_TARGET_ARCH=$$arch MINIOS_CONFIG=$$conf $(MAKE) || exit 1; \ + done; \ + done + $(MAKE) clean + +define all_sources + ( find . -name '*.[chS]' -print ) +endef + +.PHONY: cscope +cscope: + $(all_sources) > cscope.files + cscope -k -b -q + +.PHONY: tags +tags: + $(all_sources) | xargs ctags + +.PHONY: TAGS +TAGS: + $(all_sources) | xargs etags + +.PHONY: gtags +gtags: + $(all_sources) | gtags -f - diff -Nru xen-4.9.0/extras/mini-os/minios.mk xen-4.9.2/extras/mini-os/minios.mk --- xen-4.9.0/extras/mini-os/minios.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/minios.mk 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,75 @@ +# +# The file contains the common make rules for building mini-os. +# + +debug = y + +# Define some default flags. +# NB. '-Wcast-qual' is nasty, so I omitted it. +DEF_CFLAGS += -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format -Wno-redundant-decls -Wformat +DEF_CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) +DEF_CFLAGS += $(call cc-option,$(CC),-fgnu89-inline) +DEF_CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline + +DEF_ASFLAGS += -D__ASSEMBLY__ +DEF_LDFLAGS += + +ifeq ($(debug),y) +DEF_CFLAGS += -g +#DEF_CFLAGS += -DMM_DEBUG +#DEF_CFLAGS += -DFS_DEBUG +#DEF_CFLAGS += -DLIBC_DEBUG +#DEF_CFLAGS += -DGNT_DEBUG +#DEF_CFLAGS += -DGNTMAP_DEBUG +else +DEF_CFLAGS += -O3 +endif + +# Make the headers define our internal stuff +DEF_CFLAGS += -D__INSIDE_MINIOS__ + +# Build the CFLAGS and ASFLAGS for compiling and assembling. +# DEF_... flags are the common mini-os flags, +# ARCH_... flags may be defined in arch/$(TARGET_ARCH_FAM/rules.mk +CFLAGS := $(DEF_CFLAGS) $(ARCH_CFLAGS) $(DEFINES-y) +CPPFLAGS := $(DEF_CPPFLAGS) $(ARCH_CPPFLAGS) +ASFLAGS := $(DEF_ASFLAGS) $(ARCH_ASFLAGS) $(DEFINES-y) +LDFLAGS := $(DEF_LDFLAGS) $(ARCH_LDFLAGS) + +# Special build dependencies. +# Rebuild all after touching this/these file(s) +EXTRA_DEPS += $(MINIOS_ROOT)/minios.mk +EXTRA_DEPS += $(MINIOS_ROOT)/$(TARGET_ARCH_DIR)/arch.mk + +# Find all header files for checking dependencies. +HDRS := $(wildcard $(MINIOS_ROOT)/include/*.h) +HDRS += $(wildcard $(MINIOS_ROOT)/include/xen/*.h) +HDRS += $(wildcard $(ARCH_INC)/*.h) +# For special wanted header directories. +extra_heads := $(foreach dir,$(EXTRA_INC),$(wildcard $(dir)/*.h)) +HDRS += $(extra_heads) + +# Add the special header directories to the include paths. +override CPPFLAGS := $(CPPFLAGS) $(extra_incl) + +# The name of the architecture specific library. +# This is on x86_32: libx86_32.a +# $(ARCH_LIB) has to built in the architecture specific directory. +ARCH_LIB_NAME = $(MINIOS_TARGET_ARCH) +ARCH_LIB := lib$(ARCH_LIB_NAME).a + +# This object contains the entrypoint for startup from Xen. +# $(HEAD_ARCH_OBJ) has to be built in the architecture specific directory. +HEAD_ARCH_OBJ := $(MINIOS_TARGET_ARCH).o +HEAD_OBJ := $(OBJ_DIR)/$(TARGET_ARCH_DIR)/$(HEAD_ARCH_OBJ) + + +$(OBJ_DIR)/%.o: %.c $(HDRS) Makefile $(EXTRA_DEPS) + $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ + +$(OBJ_DIR)/%.o: %.S $(HDRS) Makefile $(EXTRA_DEPS) $(ARCH_AS_DEPS) + $(CC) $(ASFLAGS) $(CPPFLAGS) -c $< -o $@ + + + + diff -Nru xen-4.9.0/extras/mini-os/mm.c xen-4.9.2/extras/mini-os/mm.c --- xen-4.9.0/extras/mini-os/mm.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/mm.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,435 @@ +/* + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: mm.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos + * + * Date: Aug 2003, chages Aug 2005 + * + * Environment: Xen Minimal OS + * Description: memory management related functions + * contains buddy page allocator from Xen. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/********************* + * ALLOCATION BITMAP + * One bit per page of memory. Bit set => page is allocated. + */ + +unsigned long *mm_alloc_bitmap; +unsigned long mm_alloc_bitmap_size; + +#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8) + +#define allocated_in_map(_pn) \ + (mm_alloc_bitmap[(_pn) / PAGES_PER_MAPWORD] & \ + (1UL << ((_pn) & (PAGES_PER_MAPWORD - 1)))) + +unsigned long nr_free_pages; + +/* + * Hint regarding bitwise arithmetic in map_{alloc,free}: + * -(1<= n. + * (1<next == NULL) + +/* + * Initialise allocator, placing addresses [@min,@max] in free pool. + * @min and @max are PHYSICAL addresses. + */ +static void init_page_allocator(unsigned long min, unsigned long max) +{ + int i, m; + unsigned long range; + unsigned long r_min, r_max; + chunk_head_t *ch; + chunk_tail_t *ct; + + printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n", + (u_long)to_virt(min), min, (u_long)to_virt(max), max); + for ( i = 0; i < FREELIST_SIZE; i++ ) + { + free_head[i] = &free_tail[i]; + free_tail[i].pprev = &free_head[i]; + free_tail[i].next = NULL; + } + + min = round_pgup (min); + max = round_pgdown(max); + + /* Allocate space for the allocation bitmap. */ + mm_alloc_bitmap_size = (max + 1) >> (PAGE_SHIFT + 3); + mm_alloc_bitmap_size = round_pgup(mm_alloc_bitmap_size); + mm_alloc_bitmap = (unsigned long *)to_virt(min); + min += mm_alloc_bitmap_size; + + /* All allocated by default. */ + memset(mm_alloc_bitmap, ~0, mm_alloc_bitmap_size); + + for ( m = 0; m < e820_entries; m++ ) + { + if ( e820_map[m].type != E820_RAM ) + continue; + if ( e820_map[m].addr + e820_map[m].size >= ULONG_MAX ) + BUG(); + + r_min = e820_map[m].addr; + r_max = r_min + e820_map[m].size; + if ( r_max <= min || r_min >= max ) + continue; + if ( r_min < min ) + r_min = min; + if ( r_max > max ) + r_max = max; + + printk(" Adding memory range %lx-%lx\n", r_min, r_max); + + /* The buddy lists are addressed in high memory. */ + r_min = (unsigned long)to_virt(r_min); + r_max = (unsigned long)to_virt(r_max); + range = r_max - r_min; + + /* Free up the memory we've been given to play with. */ + map_free(PHYS_PFN(r_min), range >> PAGE_SHIFT); + + while ( range != 0 ) + { + /* + * Next chunk is limited by alignment of min, but also + * must not be bigger than remaining range. + */ + for ( i = PAGE_SHIFT; (1UL << (i + 1)) <= range; i++ ) + if ( r_min & (1UL << i) ) break; + + ch = (chunk_head_t *)r_min; + r_min += 1UL << i; + range -= 1UL << i; + ct = (chunk_tail_t *)r_min - 1; + i -= PAGE_SHIFT; + ch->level = i; + ch->next = free_head[i]; + ch->pprev = &free_head[i]; + ch->next->pprev = &ch->next; + free_head[i] = ch; + ct->level = i; + } + } + + mm_alloc_bitmap_remap(); +} + + +/* Allocate 2^@order contiguous pages. Returns a VIRTUAL address. */ +unsigned long alloc_pages(int order) +{ + int i; + chunk_head_t *alloc_ch, *spare_ch; + chunk_tail_t *spare_ct; + + if ( !chk_free_pages(1UL << order) ) + goto no_memory; + + /* Find smallest order which can satisfy the request. */ + for ( i = order; i < FREELIST_SIZE; i++ ) { + if ( !FREELIST_EMPTY(free_head[i]) ) + break; + } + + if ( i == FREELIST_SIZE ) goto no_memory; + + /* Unlink a chunk. */ + alloc_ch = free_head[i]; + free_head[i] = alloc_ch->next; + alloc_ch->next->pprev = alloc_ch->pprev; + + /* We may have to break the chunk a number of times. */ + while ( i != order ) + { + /* Split into two equal parts. */ + i--; + spare_ch = (chunk_head_t *)((char *)alloc_ch + (1UL<<(i+PAGE_SHIFT))); + spare_ct = (chunk_tail_t *)((char *)spare_ch + (1UL<<(i+PAGE_SHIFT)))-1; + + /* Create new header for spare chunk. */ + spare_ch->level = i; + spare_ch->next = free_head[i]; + spare_ch->pprev = &free_head[i]; + spare_ct->level = i; + + /* Link in the spare chunk. */ + spare_ch->next->pprev = &spare_ch->next; + free_head[i] = spare_ch; + } + + map_alloc(PHYS_PFN(to_phys(alloc_ch)), 1UL<level != order) + break; + + /* Merge with predecessor */ + freed_ch = to_merge_ch; + } + else + { + to_merge_ch = (chunk_head_t *)((char *)freed_ch + mask); + if(allocated_in_map(virt_to_pfn(to_merge_ch)) || + to_merge_ch->level != order) + break; + + /* Merge with successor */ + freed_ct = (chunk_tail_t *)((char *)to_merge_ch + mask) - 1; + } + + /* We are commited to merging, unlink the chunk */ + *(to_merge_ch->pprev) = to_merge_ch->next; + to_merge_ch->next->pprev = to_merge_ch->pprev; + + order++; + } + + /* Link the new chunk */ + freed_ch->level = order; + freed_ch->next = free_head[order]; + freed_ch->pprev = &free_head[order]; + freed_ct->level = order; + + freed_ch->next->pprev = &freed_ch->next; + free_head[order] = freed_ch; + +} + +int free_physical_pages(xen_pfn_t *mfns, int n) +{ + struct xen_memory_reservation reservation; + + set_xen_guest_handle(reservation.extent_start, mfns); + reservation.nr_extents = n; + reservation.extent_order = 0; + reservation.domid = DOMID_SELF; + return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); +} + +int map_frame_rw(unsigned long addr, unsigned long mfn) +{ + return do_map_frames(addr, &mfn, 1, 1, 1, DOMID_SELF, NULL, L1_PROT); +} + +#ifdef HAVE_LIBC +void *sbrk(ptrdiff_t increment) +{ + unsigned long old_brk = brk; + unsigned long new_brk = old_brk + increment; + + if (new_brk > heap_end) { + printk("Heap exhausted: %lx + %lx = %p > %p\n", + old_brk, + (unsigned long) increment, + (void *) new_brk, + (void *) heap_end); + return NULL; + } + + if (new_brk > heap_mapped) { + unsigned long n = (new_brk - heap_mapped + PAGE_SIZE - 1) / PAGE_SIZE; + + if ( !chk_free_pages(n) ) + { + printk("Memory exhausted: want %ld pages, but only %ld are left\n", + n, nr_free_pages); + return NULL; + } + do_map_zero(heap_mapped, n); + heap_mapped += n * PAGE_SIZE; + } + + brk = new_brk; + + return (void *) old_brk; +} +#endif + + + +void init_mm(void) +{ + + unsigned long start_pfn, max_pfn; + + printk("MM: Init\n"); + + get_max_pages(); + arch_init_mm(&start_pfn, &max_pfn); + /* + * now we can initialise the page allocator + */ + init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn)); + printk("MM: done\n"); + + arch_init_p2m(max_pfn); + + arch_init_demand_mapping_area(); + +#ifdef CONFIG_BALLOON + nr_mem_pages = max_pfn; +#endif +} + +void fini_mm(void) +{ +} + +void sanity_check(void) +{ + int x; + chunk_head_t *head; + + for (x = 0; x < FREELIST_SIZE; x++) { + for (head = free_head[x]; !FREELIST_EMPTY(head); head = head->next) { + ASSERT(!allocated_in_map(virt_to_pfn(head))); + if (head->next) + ASSERT(head->next->pprev == &head->next); + } + if (free_head[x]) { + ASSERT(free_head[x]->pprev == &free_head[x]); + } + } +} diff -Nru xen-4.9.0/extras/mini-os/netfront.c xen-4.9.2/extras/mini-os/netfront.c --- xen-4.9.0/extras/mini-os/netfront.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/netfront.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,667 @@ +/* Minimal network driver for Mini-OS. + * Copyright (c) 2006-2007 Jacob Gorm Hansen, University of Copenhagen. + * Based on netfront.c from Xen Linux. + * + * Does not handle fragments or extras. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DECLARE_WAIT_QUEUE_HEAD(netfront_queue); + +#ifdef HAVE_LIBC +#define NETIF_SELECT_RX ((void*)-1) +#endif + + + +#define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE) +#define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE) +#define GRANT_INVALID_REF 0 + + +struct net_buffer { + void* page; + grant_ref_t gref; +}; + +struct netfront_dev { + domid_t dom; + + unsigned short tx_freelist[NET_TX_RING_SIZE + 1]; + struct semaphore tx_sem; + + struct net_buffer rx_buffers[NET_RX_RING_SIZE]; + struct net_buffer tx_buffers[NET_TX_RING_SIZE]; + + struct netif_tx_front_ring tx; + struct netif_rx_front_ring rx; + grant_ref_t tx_ring_ref; + grant_ref_t rx_ring_ref; + evtchn_port_t evtchn; + + char *nodename; + char *backend; + char *mac; + + xenbus_event_queue events; + +#ifdef HAVE_LIBC + int fd; + unsigned char *data; + size_t len; + size_t rlen; +#endif + + void (*netif_rx)(unsigned char* data, int len); +}; + +void init_rx_buffers(struct netfront_dev *dev); + +static inline void add_id_to_freelist(unsigned int id,unsigned short* freelist) +{ + freelist[id + 1] = freelist[0]; + freelist[0] = id; +} + +static inline unsigned short get_id_from_freelist(unsigned short* freelist) +{ + unsigned int id = freelist[0]; + freelist[0] = freelist[id + 1]; + return id; +} + +__attribute__((weak)) void netif_rx(unsigned char* data,int len) +{ + printk("%d bytes incoming at %p\n",len,data); +} + +__attribute__((weak)) void net_app_main(void*si,unsigned char*mac) {} + +static inline int xennet_rxidx(RING_IDX idx) +{ + return idx & (NET_RX_RING_SIZE - 1); +} + +void network_rx(struct netfront_dev *dev) +{ + RING_IDX rp,cons,req_prod; + int nr_consumed, more, i, notify; + int dobreak; + + nr_consumed = 0; +moretodo: + rp = dev->rx.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + dobreak = 0; + for (cons = dev->rx.rsp_cons; cons != rp && !dobreak; nr_consumed++, cons++) + { + struct net_buffer* buf; + unsigned char* page; + int id; + + struct netif_rx_response *rx = RING_GET_RESPONSE(&dev->rx, cons); + + id = rx->id; + BUG_ON(id >= NET_RX_RING_SIZE); + + buf = &dev->rx_buffers[id]; + page = (unsigned char*)buf->page; + gnttab_end_access(buf->gref); + + if (rx->status > NETIF_RSP_NULL) + { +#ifdef HAVE_LIBC + if (dev->netif_rx == NETIF_SELECT_RX) { + int len = rx->status; + ASSERT(current == main_thread); + if (len > dev->len) + len = dev->len; + memcpy(dev->data, page+rx->offset, len); + dev->rlen = len; + /* No need to receive the rest for now */ + dobreak = 1; + } else +#endif + dev->netif_rx(page+rx->offset,rx->status); + } + } + dev->rx.rsp_cons=cons; + + RING_FINAL_CHECK_FOR_RESPONSES(&dev->rx,more); + if(more && !dobreak) goto moretodo; + + req_prod = dev->rx.req_prod_pvt; + + for(i=0; irx, req_prod + i); + struct net_buffer* buf = &dev->rx_buffers[id]; + void* page = buf->page; + + /* We are sure to have free gnttab entries since they got released above */ + buf->gref = req->gref = + gnttab_grant_access(dev->dom,virt_to_mfn(page),0); + + req->id = id; + } + + wmb(); + + dev->rx.req_prod_pvt = req_prod + i; + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->rx, notify); + if (notify) + notify_remote_via_evtchn(dev->evtchn); + +} + +void network_tx_buf_gc(struct netfront_dev *dev) +{ + + + RING_IDX cons, prod; + unsigned short id; + + do { + prod = dev->tx.sring->rsp_prod; + rmb(); /* Ensure we see responses up to 'rp'. */ + + for (cons = dev->tx.rsp_cons; cons != prod; cons++) + { + struct netif_tx_response *txrsp; + struct net_buffer *buf; + + txrsp = RING_GET_RESPONSE(&dev->tx, cons); + if (txrsp->status == NETIF_RSP_NULL) + continue; + + if (txrsp->status == NETIF_RSP_ERROR) + printk("packet error\n"); + + id = txrsp->id; + BUG_ON(id >= NET_TX_RING_SIZE); + buf = &dev->tx_buffers[id]; + gnttab_end_access(buf->gref); + buf->gref=GRANT_INVALID_REF; + + add_id_to_freelist(id,dev->tx_freelist); + up(&dev->tx_sem); + } + + dev->tx.rsp_cons = prod; + + /* + * Set a new event, then check for race with update of tx_cons. + * Note that it is essential to schedule a callback, no matter + * how few tx_buffers are pending. Even if there is space in the + * transmit ring, higher layers may be blocked because too much + * data is outstanding: in such cases notification from Xen is + * likely to be the only kick that we'll get. + */ + dev->tx.sring->rsp_event = + prod + ((dev->tx.sring->req_prod - prod) >> 1) + 1; + mb(); + } while ((cons == prod) && (prod != dev->tx.sring->rsp_prod)); + + +} + +void netfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ + int flags; + struct netfront_dev *dev = data; + + local_irq_save(flags); + + network_tx_buf_gc(dev); + network_rx(dev); + + local_irq_restore(flags); +} + +#ifdef HAVE_LIBC +void netfront_select_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ + int flags; + struct netfront_dev *dev = data; + int fd = dev->fd; + + local_irq_save(flags); + network_tx_buf_gc(dev); + local_irq_restore(flags); + + if (fd != -1) + files[fd].read = 1; + wake_up(&netfront_queue); +} +#endif + +static void free_netfront(struct netfront_dev *dev) +{ + int i; + + for(i=0;itx_sem); + + mask_evtchn(dev->evtchn); + + free(dev->mac); + free(dev->backend); + + gnttab_end_access(dev->rx_ring_ref); + gnttab_end_access(dev->tx_ring_ref); + + free_page(dev->rx.sring); + free_page(dev->tx.sring); + + unbind_evtchn(dev->evtchn); + + for(i=0;irx_buffers[i].gref); + free_page(dev->rx_buffers[i].page); + } + + for(i=0;itx_buffers[i].page) + free_page(dev->tx_buffers[i].page); + + free(dev->nodename); + free(dev); +} + +struct netfront_dev *init_netfront(char *_nodename, void (*thenetif_rx)(unsigned char* data, int len), unsigned char rawmac[6], char **ip) +{ + xenbus_transaction_t xbt; + char* err; + char* message=NULL; + struct netif_tx_sring *txs; + struct netif_rx_sring *rxs; + int retry=0; + int i; + char* msg = NULL; + char nodename[256]; + char path[256]; + struct netfront_dev *dev; + static int netfrontends = 0; + + if (!_nodename) + snprintf(nodename, sizeof(nodename), "device/vif/%d", netfrontends); + else { + strncpy(nodename, _nodename, sizeof(nodename) - 1); + nodename[sizeof(nodename) - 1] = 0; + } + netfrontends++; + + if (!thenetif_rx) + thenetif_rx = netif_rx; + + printk("************************ NETFRONT for %s **********\n\n\n", nodename); + + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + printk("net TX ring size %lu\n", (unsigned long) NET_TX_RING_SIZE); + printk("net RX ring size %lu\n", (unsigned long) NET_RX_RING_SIZE); + init_SEMAPHORE(&dev->tx_sem, NET_TX_RING_SIZE); + for(i=0;itx_freelist); + dev->tx_buffers[i].page = NULL; + } + + for(i=0;irx_buffers[i].page = (char*)alloc_page(); + } + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dev->dom = xenbus_read_integer(path); +#ifdef HAVE_LIBC + if (thenetif_rx == NETIF_SELECT_RX) + evtchn_alloc_unbound(dev->dom, netfront_select_handler, dev, &dev->evtchn); + else +#endif + evtchn_alloc_unbound(dev->dom, netfront_handler, dev, &dev->evtchn); + + txs = (struct netif_tx_sring *) alloc_page(); + rxs = (struct netif_rx_sring *) alloc_page(); + memset(txs,0,PAGE_SIZE); + memset(rxs,0,PAGE_SIZE); + + + SHARED_RING_INIT(txs); + SHARED_RING_INIT(rxs); + FRONT_RING_INIT(&dev->tx, txs, PAGE_SIZE); + FRONT_RING_INIT(&dev->rx, rxs, PAGE_SIZE); + + dev->tx_ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(txs),0); + dev->rx_ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(rxs),0); + + init_rx_buffers(dev); + + dev->netif_rx = thenetif_rx; + + dev->events = NULL; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "tx-ring-ref","%u", + dev->tx_ring_ref); + if (err) { + message = "writing tx ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "rx-ring-ref","%u", + dev->rx_ring_ref); + if (err) { + message = "writing rx ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "event-channel", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, nodename, "request-rx-copy", "%u", 1); + + if (err) { + message = "writing request-rx-copy"; + goto abort_transaction; + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateConnected); + if (err) { + message = "switching state"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + snprintf(path, sizeof(path), "%s/mac", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->mac); + + if ((dev->backend == NULL) || (dev->mac == NULL)) { + printk("%s: backend/mac failed\n", __func__); + goto error; + } + + printk("backend at %s\n",dev->backend); + printk("mac is %s\n",dev->mac); + + { + XenbusState state; + char path[strlen(dev->backend) + strlen("/state") + 1]; + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + + err = NULL; + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateConnected) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + if (state != XenbusStateConnected) { + printk("backend not avalable, state=%d\n", state); + xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + + if (ip) { + snprintf(path, sizeof(path), "%s/ip", dev->backend); + xenbus_read(XBT_NIL, path, ip); + } + } + + printk("**************************\n"); + + unmask_evtchn(dev->evtchn); + + /* Special conversion specifier 'hh' needed for __ia64__. Without + this mini-os panics with 'Unaligned reference'. */ + if (rawmac) + sscanf(dev->mac,"%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &rawmac[0], + &rawmac[1], + &rawmac[2], + &rawmac[3], + &rawmac[4], + &rawmac[5]); + + return dev; +error: + free(msg); + free(err); + free_netfront(dev); + return NULL; +} + +#ifdef HAVE_LIBC +int netfront_tap_open(char *nodename) { + struct netfront_dev *dev; + + dev = init_netfront(nodename, NETIF_SELECT_RX, NULL, NULL); + if (!dev) { + printk("TAP open failed\n"); + errno = EIO; + return -1; + } + dev->fd = alloc_fd(FTYPE_TAP); + printk("tap_open(%s) -> %d\n", nodename, dev->fd); + files[dev->fd].tap.dev = dev; + return dev->fd; +} +#endif + +void shutdown_netfront(struct netfront_dev *dev) +{ + char* err = NULL, *err2; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/request-rx-copy") + 1]; + + printk("close network: backend at %s\n",dev->backend); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("shutdown_netfront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("shutdown_netfront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close; + } + state = xenbus_read_integer(path); + while (state < XenbusStateClosed) { + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + } + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { + printk("shutdown_netfront: error changing state to %d: %s\n", + XenbusStateInitialising, err); + goto close; + } + state = xenbus_read_integer(path); + while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed)) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + +close: + free(err); + err2 = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err2); + + snprintf(nodename, sizeof(nodename), "%s/tx-ring-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/rx-ring-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/request-rx-copy", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + + if (!err) + free_netfront(dev); +} + + +void init_rx_buffers(struct netfront_dev *dev) +{ + int i, requeue_idx; + netif_rx_request_t *req; + int notify; + + /* Rebuild the RX buffer freelist and the RX ring itself. */ + for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) + { + struct net_buffer* buf = &dev->rx_buffers[requeue_idx]; + req = RING_GET_REQUEST(&dev->rx, requeue_idx); + + buf->gref = req->gref = + gnttab_grant_access(dev->dom,virt_to_mfn(buf->page),0); + + req->id = requeue_idx; + + requeue_idx++; + } + + dev->rx.req_prod_pvt = requeue_idx; + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->rx, notify); + + if (notify) + notify_remote_via_evtchn(dev->evtchn); + + dev->rx.sring->rsp_event = dev->rx.rsp_cons + 1; +} + + +void netfront_xmit(struct netfront_dev *dev, unsigned char* data,int len) +{ + int flags; + struct netif_tx_request *tx; + RING_IDX i; + int notify; + unsigned short id; + struct net_buffer* buf; + void* page; + + BUG_ON(len > PAGE_SIZE); + + down(&dev->tx_sem); + + local_irq_save(flags); + id = get_id_from_freelist(dev->tx_freelist); + local_irq_restore(flags); + + buf = &dev->tx_buffers[id]; + page = buf->page; + if (!page) + page = buf->page = (char*) alloc_page(); + + i = dev->tx.req_prod_pvt; + tx = RING_GET_REQUEST(&dev->tx, i); + + memcpy(page,data,len); + + buf->gref = + tx->gref = gnttab_grant_access(dev->dom,virt_to_mfn(page),1); + + tx->offset=0; + tx->size = len; + tx->flags=0; + tx->id = id; + dev->tx.req_prod_pvt = i + 1; + + wmb(); + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->tx, notify); + + if(notify) notify_remote_via_evtchn(dev->evtchn); + + local_irq_save(flags); + network_tx_buf_gc(dev); + local_irq_restore(flags); +} + +#ifdef HAVE_LIBC +ssize_t netfront_receive(struct netfront_dev *dev, unsigned char *data, size_t len) +{ + unsigned long flags; + int fd = dev->fd; + ASSERT(current == main_thread); + + dev->rlen = 0; + dev->data = data; + dev->len = len; + + local_irq_save(flags); + network_rx(dev); + if (!dev->rlen && fd != -1) + /* No data for us, make select stop returning */ + files[fd].read = 0; + /* Before re-enabling the interrupts, in case a packet just arrived in the + * meanwhile. */ + local_irq_restore(flags); + + dev->data = NULL; + dev->len = 0; + + return dev->rlen; +} +#endif diff -Nru xen-4.9.0/extras/mini-os/pcifront.c xen-4.9.2/extras/mini-os/pcifront.c --- xen-4.9.0/extras/mini-os/pcifront.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/pcifront.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,616 @@ +/* Minimal PCI driver for Mini-OS. + * Copyright (c) 2007-2008 Samuel Thibault. + * Based on blkfront.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) + +DECLARE_WAIT_QUEUE_HEAD(pcifront_queue); +static struct pcifront_dev *pcidev; + +struct pcifront_dev { + domid_t dom; + + struct xen_pci_sharedinfo *info; + grant_ref_t info_ref; + evtchn_port_t evtchn; + + char *nodename; + char *backend; + + xenbus_event_queue events; +}; + +void pcifront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ + wake_up(&pcifront_queue); +} + +static void free_pcifront(struct pcifront_dev *dev) +{ + if (!dev) + dev = pcidev; + + mask_evtchn(dev->evtchn); + + gnttab_end_access(dev->info_ref); + free_page(dev->info); + + unbind_evtchn(dev->evtchn); + + free(dev->backend); + free(dev->nodename); + free(dev); +} + +void pcifront_watches(void *opaque) +{ + XenbusState state; + char *err = NULL, *msg = NULL; + char *be_path, *be_state; + char* nodename = opaque ? opaque : "device/pci/0"; + char path[strlen(nodename) + 9]; + char fe_state[strlen(nodename) + 7]; + xenbus_event_queue events = NULL; + + snprintf(path, sizeof(path), "%s/backend", nodename); + snprintf(fe_state, sizeof(fe_state), "%s/state", nodename); + + while (1) { + printk("pcifront_watches: waiting for backend path to appear %s\n", path); + xenbus_watch_path_token(XBT_NIL, path, path, &events); + while ((err = xenbus_read(XBT_NIL, path, &be_path)) != NULL) { + free(err); + xenbus_wait_for_watch(&events); + } + xenbus_unwatch_path_token(XBT_NIL, path, path); + printk("pcifront_watches: waiting for backend to get into the right state %s\n", be_path); + be_state = (char *) malloc(strlen(be_path) + 7); + snprintf(be_state, strlen(be_path) + 7, "%s/state", be_path); + xenbus_watch_path_token(XBT_NIL, be_state, be_state, &events); + while ((err = xenbus_read(XBT_NIL, be_state, &msg)) != NULL || msg[0] > '4') { + free(msg); + free(err); + xenbus_wait_for_watch(&events); + } + xenbus_unwatch_path_token(XBT_NIL, be_state, be_state); + if (init_pcifront(NULL) == NULL) { + free(be_state); + free(be_path); + continue; + } + xenbus_watch_path_token(XBT_NIL, be_state, be_state, &events); + state = XenbusStateConnected; + printk("pcifront_watches: waiting for backend events %s\n", be_state); + while ((err = xenbus_wait_for_state_change(be_state, &state, &events)) == NULL && + (err = xenbus_read(XBT_NIL, pcidev->backend, &msg)) == NULL) { + free(msg); + printk("pcifront_watches: backend state changed: %s %d\n", be_state, state); + if (state == XenbusStateReconfiguring) { + printk("pcifront_watches: writing %s %d\n", fe_state, XenbusStateReconfiguring); + if ((err = xenbus_switch_state(XBT_NIL, fe_state, XenbusStateReconfiguring)) != NULL) { + printk("pcifront_watches: error changing state to %d: %s\n", + XenbusStateReconfiguring, err); + if (!strcmp(err, "ENOENT")) { + xenbus_write(XBT_NIL, fe_state, "7"); + free(err); + } + } + } else if (state == XenbusStateReconfigured) { + printk("pcifront_watches: writing %s %d\n", fe_state, XenbusStateConnected); + printk("pcifront_watches: changing state to %d\n", XenbusStateConnected); + if ((err = xenbus_switch_state(XBT_NIL, fe_state, XenbusStateConnected)) != NULL) { + printk("pcifront_watches: error changing state to %d: %s\n", + XenbusStateConnected, err); + if (!strcmp(err, "ENOENT")) { + xenbus_write(XBT_NIL, fe_state, "4"); + free(err); + } + } + } else if (state == XenbusStateClosing) + break; + } + if (err) { + printk("pcifront_watches: done waiting err=%s\n", err); + free(err); + } else + printk("pcifront_watches: done waiting\n"); + err = xenbus_unwatch_path_token(XBT_NIL, be_state, be_state); + shutdown_pcifront(pcidev); + free(be_state); + free(be_path); + free(err); + pcidev = NULL; + } + + xenbus_unwatch_path_token(XBT_NIL, path, path); +} + +struct pcifront_dev *init_pcifront(char *_nodename) +{ + xenbus_transaction_t xbt; + char* err; + char* message=NULL; + int retry=0; + char* msg = NULL; + char* nodename = _nodename ? _nodename : "device/pci/0"; + int dom; + + struct pcifront_dev *dev; + + char path[strlen(nodename) + strlen("/backend-id") + 1]; + + if (!_nodename && pcidev) + return pcidev; + + printk("******************* PCIFRONT for %s **********\n\n\n", nodename); + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dom = xenbus_read_integer(path); + if (dom == -1) { + printk("no backend\n"); + return NULL; + } + + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); + dev->dom = dom; + + evtchn_alloc_unbound(dev->dom, pcifront_handler, dev, &dev->evtchn); + + dev->info = (struct xen_pci_sharedinfo*) alloc_page(); + memset(dev->info,0,PAGE_SIZE); + + dev->info_ref = gnttab_grant_access(dev->dom,virt_to_mfn(dev->info),0); + + dev->events = NULL; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "pci-op-ref","%u", + dev->info_ref); + if (err) { + message = "writing pci-op-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "event-channel", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "magic", XEN_PCI_MAGIC); + if (err) { + message = "writing magic"; + goto abort_transaction; + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateInitialised); + if (err) { + message = "switching state"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + if (msg) { + printk("Error %s when reading the backend path %s\n", msg, path); + goto error; + } + + printk("backend at %s\n", dev->backend); + + { + char path[strlen(dev->backend) + strlen("/state") + 1]; + char frontpath[strlen(nodename) + strlen("/state") + 1]; + XenbusState state; + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + + err = NULL; + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateConnected) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + if (state != XenbusStateConnected) { + printk("backend not avalable, state=%d\n", state); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + + snprintf(frontpath, sizeof(frontpath), "%s/state", nodename); + if ((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected)) + != NULL) { + printk("error switching state %s\n", err); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + } + unmask_evtchn(dev->evtchn); + + printk("**************************\n"); + + if (!_nodename) + pcidev = dev; + + return dev; + +error: + free(msg); + free(err); + free_pcifront(dev); + return NULL; +} + +void pcifront_scan(struct pcifront_dev *dev, void (*func)(unsigned int domain, unsigned int bus, unsigned slot, unsigned int fun)) +{ + char *path; + int i, n, len; + char *s, *msg = NULL; + unsigned int domain, bus, slot, fun; + + if (!dev) + dev = pcidev; + if (!dev) { + printk("pcifront_scan: device or bus\n"); + return; + } + + len = strlen(dev->backend) + 1 + 5 + 10 + 1; + path = (char *) malloc(len); + snprintf(path, len, "%s/num_devs", dev->backend); + n = xenbus_read_integer(path); + + for (i = 0; i < n; i++) { + snprintf(path, len, "%s/dev-%d", dev->backend, i); + msg = xenbus_read(XBT_NIL, path, &s); + if (msg) { + printk("Error %s when reading the PCI root name at %s\n", msg, path); + free(msg); + continue; + } + + if (sscanf(s, "%x:%x:%x.%x", &domain, &bus, &slot, &fun) != 4) { + printk("\"%s\" does not look like a PCI device address\n", s); + free(s); + continue; + } + free(s); + + if (func) + func(domain, bus, slot, fun); + } + free(path); +} + +void shutdown_pcifront(struct pcifront_dev *dev) +{ + char* err = NULL, *err2; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/event-channel") + 1]; + + printk("close pci: backend at %s\n",dev->backend); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("shutdown_pcifront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close_pcifront; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("shutdown_pcifront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close_pcifront; + } + state = xenbus_read_integer(path); + while (state < XenbusStateClosed) { + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + } + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { + printk("shutdown_pcifront: error changing state to %d: %s\n", + XenbusStateInitialising, err); + goto close_pcifront; + } + state = xenbus_read_integer(path); + while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed)) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + +close_pcifront: + free(err); + err2 = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err2); + + snprintf(nodename, sizeof(nodename), "%s/info-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + + if (!err) + free_pcifront(dev); +} + +int pcifront_physical_to_virtual (struct pcifront_dev *dev, + unsigned int *dom, + unsigned int *bus, + unsigned int *slot, + unsigned int *fun) +{ + /* FIXME: the buffer sizing is a little lazy here. 10 extra bytes + should be enough to hold the paths we need to construct, even + if the number of devices is large */ + char path[strlen(dev->backend) + strlen("/num_devs") + 10 + 1]; + int i, n; + char *s, *msg = NULL; + unsigned int dom1, bus1, slot1, fun1; + + if (!dev) + dev = pcidev; + + snprintf(path, sizeof(path), "%s/num_devs", dev->backend); + n = xenbus_read_integer(path); + + for (i = 0; i < n; i++) { + snprintf(path, sizeof(path), "%s/dev-%d", dev->backend, i); + msg = xenbus_read(XBT_NIL, path, &s); + if (msg) { + printk("Error %s when reading the PCI root name at %s\n", msg, path); + free(msg); + continue; + } + + if (sscanf(s, "%x:%x:%x.%x", &dom1, &bus1, &slot1, &fun1) != 4) { + printk("\"%s\" does not look like a PCI device address\n", s); + free(s); + continue; + } + free(s); + + if (dom1 == *dom && bus1 == *bus && slot1 == *slot && fun1 == *fun) { + snprintf(path, sizeof(path), "%s/vdev-%d", dev->backend, i); + msg = xenbus_read(XBT_NIL, path, &s); + if (msg) { + printk("Error %s when reading the PCI root name at %s\n", msg, path); + continue; + } + + if (sscanf(s, "%x:%x:%x.%x", dom, bus, slot, fun) != 4) { + printk("\"%s\" does not look like a PCI device address\n", s); + free(s); + continue; + } + free(s); + + return 0; + } + } + return -1; +} + +void pcifront_op(struct pcifront_dev *dev, struct xen_pci_op *op) +{ + if (!dev) + dev = pcidev; + dev->info->op = *op; + /* Make sure info is written before the flag */ + wmb(); + set_bit(_XEN_PCIF_active, (void*) &dev->info->flags); + notify_remote_via_evtchn(dev->evtchn); + + wait_event(pcifront_queue, !test_bit(_XEN_PCIF_active, (void*) &dev->info->flags)); + + /* Make sure flag is read before info */ + rmb(); + *op = dev->info->op; +} + +int pcifront_conf_read(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + unsigned int off, unsigned int size, unsigned int *val) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_conf_read; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + op.offset = off; + op.size = size; + + pcifront_op(dev, &op); + + if (op.err) + return op.err; + + *val = op.value; + + return 0; +} + +int pcifront_conf_write(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + unsigned int off, unsigned int size, unsigned int val) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_conf_write; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + op.offset = off; + op.size = size; + + op.value = val; + + pcifront_op(dev, &op); + + return op.err; +} + +int pcifront_enable_msi(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_enable_msi; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + + pcifront_op(dev, &op); + + if (op.err) + return op.err; + else + return op.value; +} + +int pcifront_disable_msi(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_disable_msi; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + + pcifront_op(dev, &op); + + return op.err; +} + +int pcifront_enable_msix(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + struct xen_msix_entry *entries, int n) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + if (n > SH_INFO_MAX_VEC) + return XEN_PCI_ERR_op_failed; + + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_enable_msix; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + op.value = n; + + memcpy(op.msix_entries, entries, n * sizeof(*entries)); + + pcifront_op(dev, &op); + + if (op.err) + return op.err; + + memcpy(entries, op.msix_entries, n * sizeof(*entries)); + + return 0; +} + + +int pcifront_disable_msix(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_disable_msix; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + + pcifront_op(dev, &op); + + return op.err; +} diff -Nru xen-4.9.0/extras/mini-os/README xen-4.9.2/extras/mini-os/README --- xen-4.9.0/extras/mini-os/README 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/README 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,84 @@ + Minimal OS + ---------- + +This shows some of the stuff that any guest OS will have to set up. + +This includes: + + * installing a virtual exception table + * handling virtual exceptions + * handling asynchronous events + * enabling/disabling async events + * parsing start_info struct at start-of-day + * registering virtual interrupt handlers (for timer interrupts) + * a simple page and memory allocator + * minimal libc support + * minimal Copy-on-Write support + * network, block, framebuffer support + * transparent access to FileSystem exports (see tools/fs-back) + +- to build it just type make. + +- Mini-OS can be configured in various ways by specifying a config file: + + MINIOS_CONFIG=config-file make + + config-file can contain various CONFIG_* items set to either "y" or "n". + Their defaults can be found in Config.mk. + It is possible to specify the interface version of Xen via setting + + XEN_INTERFACE_VERSION= + + in the config file. This defaults to 0x00030205, which is the minimal + version supported. The latest available version is specified by setting + + XEN_INTERFACE_VERSION=__XEN_LATEST_INTERFACE_VERSION__ + +- By typing + + make testbuild + + it is possible to test builds of various configurations. This should be + done always after modifying Mini-OS. + + The configurations which are build tested can be found in the directory + arch/*/testbuild with one file per configuration. Those configurations are + being built for each sub-architecture (e.g. x86_32 and x86_64 for the + x86 architecture). + + Please update the current configuration files when adding a new CONFIG_ + item and maybe even add a new configuration file if the new item interacts + with other CONFIG_ items. + +- to build it with TCP/IP support, download LWIP 1.3.2 source code and type + + make LWIPDIR=/path/to/lwip/source + +- to build it with much better libc support, see the stubdom/ directory + +- to start it do the following in domain0 + # xl create -c domain_config + +This starts the kernel and prints out a bunch of stuff and then once every +second the system time. + +If you have setup a disk in the config file (e.g. +disk = [ 'file:/tmp/foo,hda,r' ] ), it will loop reading it. If that disk is +writable (e.g. disk = [ 'file:/tmp/foo,hda,w' ] ), it will write data patterns +and re-read them. + +If you have setup a network in the config file (e.g. vif = [''] ), it will +print incoming packets. + +If you have setup a VFB in the config file (e.g. vfb = ['type=sdl'] ), it will +show a mouse with which you can draw color squares. + +If you have compiled it with TCP/IP support, it will run a daytime server on +TCP port 13. + + + ARM notes + ========= + +- The IRQ numbers are currently hard-coded in gic.c and may need to be updated if + future versions of Xen change them. diff -Nru xen-4.9.0/extras/mini-os/sched.c xen-4.9.2/extras/mini-os/sched.c --- xen-4.9.0/extras/mini-os/sched.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/sched.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,256 @@ +/* + **************************************************************************** + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: sched.c + * Author: Grzegorz Milos + * Changes: Robert Kaiser + * + * Date: Aug 2005 + * + * Environment: Xen Minimal OS + * Description: simple scheduler for Mini-Os + * + * The scheduler is non-preemptive (cooperative), and schedules according + * to Round Robin algorithm. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifdef SCHED_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=sched.c, line=%d) " _f "\n", __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + +MINIOS_TAILQ_HEAD(thread_list, struct thread); + +struct thread *idle_thread = NULL; +static struct thread_list exited_threads = MINIOS_TAILQ_HEAD_INITIALIZER(exited_threads); +static struct thread_list thread_list = MINIOS_TAILQ_HEAD_INITIALIZER(thread_list); +static int threads_started; + +struct thread *main_thread; + +void schedule(void) +{ + struct thread *prev, *next, *thread, *tmp; + unsigned long flags; + + if (irqs_disabled()) { + printk("Must not call schedule() with IRQs disabled\n"); + BUG(); + } + + prev = current; + local_irq_save(flags); + + if (in_callback) { + printk("Must not call schedule() from a callback\n"); + BUG(); + } + + do { + /* Examine all threads. + Find a runnable thread, but also wake up expired ones and find the + time when the next timeout expires, else use 10 seconds. */ + s_time_t now = NOW(); + s_time_t min_wakeup_time = now + SECONDS(10); + next = NULL; + MINIOS_TAILQ_FOREACH_SAFE(thread, &thread_list, thread_list, tmp) + { + if (!is_runnable(thread) && thread->wakeup_time != 0LL) + { + if (thread->wakeup_time <= now) + wake(thread); + else if (thread->wakeup_time < min_wakeup_time) + min_wakeup_time = thread->wakeup_time; + } + if(is_runnable(thread)) + { + next = thread; + /* Put this thread on the end of the list */ + MINIOS_TAILQ_REMOVE(&thread_list, thread, thread_list); + MINIOS_TAILQ_INSERT_TAIL(&thread_list, thread, thread_list); + break; + } + } + if (next) + break; + /* block until the next timeout expires, or for 10 secs, whichever comes first */ + block_domain(min_wakeup_time); + /* handle pending events if any */ + force_evtchn_callback(); + } while(1); + local_irq_restore(flags); + /* Interrupting the switch is equivalent to having the next thread + inturrupted at the return instruction. And therefore at safe point. */ + if(prev != next) switch_threads(prev, next); + + MINIOS_TAILQ_FOREACH_SAFE(thread, &exited_threads, thread_list, tmp) + { + if(thread != prev) + { + MINIOS_TAILQ_REMOVE(&exited_threads, thread, thread_list); + free_pages(thread->stack, STACK_SIZE_PAGE_ORDER); + xfree(thread); + } + } +} + +struct thread* create_thread(char *name, void (*function)(void *), void *data) +{ + struct thread *thread; + unsigned long flags; + /* Call architecture specific setup. */ + thread = arch_create_thread(name, function, data); + /* Not runable, not exited, not sleeping */ + thread->flags = 0; + thread->wakeup_time = 0LL; +#ifdef HAVE_LIBC + _REENT_INIT_PTR((&thread->reent)) +#endif + set_runnable(thread); + local_irq_save(flags); + MINIOS_TAILQ_INSERT_TAIL(&thread_list, thread, thread_list); + local_irq_restore(flags); + return thread; +} + +#ifdef HAVE_LIBC +static struct _reent callback_reent; +struct _reent *__getreent(void) +{ + struct _reent *_reent; + + if (!threads_started) + _reent = _impure_ptr; + else if (in_callback) + _reent = &callback_reent; + else + _reent = &get_current()->reent; + +#ifndef NDEBUG +#if defined(__x86_64__) || defined(__x86__) + { +#ifdef __x86_64__ + register unsigned long sp asm ("rsp"); +#else + register unsigned long sp asm ("esp"); +#endif + if ((sp & (STACK_SIZE-1)) < STACK_SIZE / 16) { + static int overflowing; + if (!overflowing) { + overflowing = 1; + printk("stack overflow\n"); + BUG(); + } + } + } +#endif +#else +#error Not implemented yet +#endif + return _reent; +} +#endif + +void exit_thread(void) +{ + unsigned long flags; + struct thread *thread = current; + printk("Thread \"%s\" exited.\n", thread->name); + local_irq_save(flags); + /* Remove from the thread list */ + MINIOS_TAILQ_REMOVE(&thread_list, thread, thread_list); + clear_runnable(thread); + /* Put onto exited list */ + MINIOS_TAILQ_INSERT_HEAD(&exited_threads, thread, thread_list); + local_irq_restore(flags); + /* Schedule will free the resources */ + while(1) + { + schedule(); + printk("schedule() returned! Trying again\n"); + } +} + +void block(struct thread *thread) +{ + thread->wakeup_time = 0LL; + clear_runnable(thread); +} + +void msleep(uint32_t millisecs) +{ + struct thread *thread = get_current(); + thread->wakeup_time = NOW() + MILLISECS(millisecs); + clear_runnable(thread); + schedule(); +} + +void wake(struct thread *thread) +{ + thread->wakeup_time = 0LL; + set_runnable(thread); +} + +void idle_thread_fn(void *unused) +{ + threads_started = 1; + while (1) { + block(current); + schedule(); + } +} + +void init_sched(void) +{ + printk("Initialising scheduler\n"); + +#ifdef HAVE_LIBC + _REENT_INIT_PTR((&callback_reent)) +#endif + idle_thread = create_thread("Idle", idle_thread_fn, NULL); +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/extras/mini-os/scripts/travis-build xen-4.9.2/extras/mini-os/scripts/travis-build --- xen-4.9.0/extras/mini-os/scripts/travis-build 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/scripts/travis-build 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,5 @@ +#!/bin/bash -ex + +$CC --version + +make testbuild diff -Nru xen-4.9.0/extras/mini-os/test.c xen-4.9.2/extras/mini-os/test.c --- xen-4.9.0/extras/mini-os/test.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/test.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,579 @@ +/****************************************************************************** + * test.c + * + * Test code for all the various frontends; split from kernel.c + * + * Copyright (c) 2002-2003, K A Fraser & R Neugebauer + * Copyright (c) 2005, Grzegorz Milos, Intel Research Cambridge + * Copyright (c) 2006, Robert Kaiser, FH Wiesbaden + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_XENBUS +static unsigned int do_shutdown = 0; +static unsigned int shutdown_reason; +static DECLARE_WAIT_QUEUE_HEAD(shutdown_queue); +#endif + +#ifdef CONFIG_XENBUS +void test_xenbus(void); + +static void xenbus_tester(void *p) +{ + test_xenbus(); +} +#endif + +#ifndef HAVE_LIBC +/* Should be random enough for our uses */ +int rand(void) +{ + static unsigned int previous; + struct timeval tv; + gettimeofday(&tv, NULL); + previous += tv.tv_sec + tv.tv_usec; + previous *= RAND_MIX; + return previous; +} +#endif + +static void periodic_thread(void *p) +{ + struct timeval tv; + printk("Periodic thread started.\n"); + for(;;) + { + gettimeofday(&tv, NULL); + printk("T(s=%ld us=%ld)\n", tv.tv_sec, tv.tv_usec); + msleep(1000); + } +} + +#ifdef CONFIG_NETFRONT +static struct netfront_dev *net_dev; +static struct semaphore net_sem = __SEMAPHORE_INITIALIZER(net_sem, 0); + +static void netfront_thread(void *p) +{ + net_dev = init_netfront(NULL, NULL, NULL, NULL); + up(&net_sem); +} +#endif + +#ifdef CONFIG_BLKFRONT +static struct blkfront_dev *blk_dev; +static struct blkfront_info blk_info; +static uint64_t blk_size_read; +static uint64_t blk_size_write; +static struct semaphore blk_sem = __SEMAPHORE_INITIALIZER(blk_sem, 0);; + +struct blk_req { + struct blkfront_aiocb aiocb; + int rand_value; + struct blk_req *next; +}; + +#ifdef BLKTEST_WRITE +static struct blk_req *blk_to_read; +#endif + +static struct blk_req *blk_alloc_req(uint64_t sector) +{ + struct blk_req *req = xmalloc(struct blk_req); + req->aiocb.aio_dev = blk_dev; + req->aiocb.aio_buf = _xmalloc(blk_info.sector_size, blk_info.sector_size); + req->aiocb.aio_nbytes = blk_info.sector_size; + req->aiocb.aio_offset = sector * blk_info.sector_size; + req->aiocb.data = req; + req->next = NULL; + return req; +} + +static void blk_read_completed(struct blkfront_aiocb *aiocb, int ret) +{ + struct blk_req *req = aiocb->data; + if (ret) + printk("got error code %d when reading at offset %ld\n", ret, (long) aiocb->aio_offset); + else + blk_size_read += blk_info.sector_size; + free(aiocb->aio_buf); + free(req); +} + +static void blk_read_sector(uint64_t sector) +{ + struct blk_req *req; + + req = blk_alloc_req(sector); + req->aiocb.aio_cb = blk_read_completed; + + blkfront_aio_read(&req->aiocb); +} + +#ifdef BLKTEST_WRITE +static void blk_write_read_completed(struct blkfront_aiocb *aiocb, int ret) +{ + struct blk_req *req = aiocb->data; + int rand_value; + int i; + int *buf; + + if (ret) { + printk("got error code %d when reading back at offset %ld\n", ret, aiocb->aio_offset); + free(aiocb->aio_buf); + free(req); + return; + } + blk_size_read += blk_info.sector_size; + buf = (int*) aiocb->aio_buf; + rand_value = req->rand_value; + for (i = 0; i < blk_info.sector_size / sizeof(int); i++) { + if (buf[i] != rand_value) { + printk("bogus data at offset %ld\n", aiocb->aio_offset + i); + break; + } + rand_value *= RAND_MIX; + } + free(aiocb->aio_buf); + free(req); +} + +static void blk_write_completed(struct blkfront_aiocb *aiocb, int ret) +{ + struct blk_req *req = aiocb->data; + if (ret) { + printk("got error code %d when writing at offset %ld\n", ret, aiocb->aio_offset); + free(aiocb->aio_buf); + free(req); + return; + } + blk_size_write += blk_info.sector_size; + /* Push write check */ + req->next = blk_to_read; + blk_to_read = req; +} + +static void blk_write_sector(uint64_t sector) +{ + struct blk_req *req; + int rand_value; + int i; + int *buf; + + req = blk_alloc_req(sector); + req->aiocb.aio_cb = blk_write_completed; + req->rand_value = rand_value = rand(); + + buf = (int*) req->aiocb.aio_buf; + for (i = 0; i < blk_info.sector_size / sizeof(int); i++) { + buf[i] = rand_value; + rand_value *= RAND_MIX; + } + + blkfront_aio_write(&req->aiocb); +} +#endif + +static void blkfront_thread(void *p) +{ + time_t lasttime = 0; + + blk_dev = init_blkfront(NULL, &blk_info); + if (!blk_dev) { + up(&blk_sem); + return; + } + + if (blk_info.info & VDISK_CDROM) + printk("Block device is a CDROM\n"); + if (blk_info.info & VDISK_REMOVABLE) + printk("Block device is removable\n"); + if (blk_info.info & VDISK_READONLY) + printk("Block device is read-only\n"); + +#ifdef BLKTEST_WRITE + if (blk_info.mode == O_RDWR) { + blk_write_sector(0); + blk_write_sector(blk_info.sectors-1); + } else +#endif + { + blk_read_sector(0); + blk_read_sector(blk_info.sectors-1); + } + + while (!do_shutdown) { + uint64_t sector = rand() % blk_info.sectors; + struct timeval tv; +#ifdef BLKTEST_WRITE + if (blk_info.mode == O_RDWR) + blk_write_sector(sector); + else +#endif + blk_read_sector(sector); + blkfront_aio_poll(blk_dev); + gettimeofday(&tv, NULL); + if (tv.tv_sec > lasttime + 10) { + printk("%llu read, %llu write\n", + (unsigned long long) blk_size_read, + (unsigned long long) blk_size_write); + lasttime = tv.tv_sec; + } + +#ifdef BLKTEST_WRITE + while (blk_to_read) { + struct blk_req *req = blk_to_read; + blk_to_read = blk_to_read->next; + req->aiocb.aio_cb = blk_write_read_completed; + blkfront_aio_read(&req->aiocb); + } +#endif + } + up(&blk_sem); +} +#endif + +#if defined(CONFIG_FBFRONT) && defined(CONFIG_KBDFRONT) +#define WIDTH 800 +#define HEIGHT 600 +#define DEPTH 32 + +static uint32_t *fb; +static int refresh_period = 50; +static struct fbfront_dev *fb_dev; +static struct semaphore fbfront_sem = __SEMAPHORE_INITIALIZER(fbfront_sem, 0); + +static void fbfront_drawvert(int x, int y1, int y2, uint32_t color) +{ + int y; + if (x < 0) + return; + if (x >= WIDTH) + return; + if (y1 < 0) + y1 = 0; + if (y2 >= HEIGHT) + y2 = HEIGHT-1; + for (y = y1; y <= y2; y++) + fb[x + y*WIDTH] ^= color; +} + +static void fbfront_drawhoriz(int x1, int x2, int y, uint32_t color) +{ + int x; + if (y < 0) + return; + if (y >= HEIGHT) + return; + if (x1 < 0) + x1 = 0; + if (x2 >= WIDTH) + x2 = WIDTH-1; + for (x = x1; x <= x2; x++) + fb[x + y*WIDTH] ^= color; +} + +static void fbfront_thread(void *p) +{ + size_t line_length = WIDTH * (DEPTH / 8); + size_t memsize = HEIGHT * line_length; + unsigned long *mfns; + int i, n = (memsize + PAGE_SIZE-1) / PAGE_SIZE; + + memsize = n * PAGE_SIZE; + fb = _xmalloc(memsize, PAGE_SIZE); + memset(fb, 0, memsize); + mfns = xmalloc_array(unsigned long, n); + for (i = 0; i < n; i++) + mfns[i] = virtual_to_mfn((char *) fb + i * PAGE_SIZE); + fb_dev = init_fbfront(NULL, mfns, WIDTH, HEIGHT, DEPTH, line_length, n); + xfree(mfns); + if (!fb_dev) { + xfree(fb); + } + up(&fbfront_sem); +} + +static void clip_cursor(int *x, int *y) +{ + if (*x < 0) + *x = 0; + if (*x >= WIDTH) + *x = WIDTH - 1; + if (*y < 0) + *y = 0; + if (*y >= HEIGHT) + *y = HEIGHT - 1; +} + +static void refresh_cursor(int new_x, int new_y) +{ + static int old_x = -1, old_y = -1; + + if (!refresh_period) + return; + + if (old_x != -1 && old_y != -1) { + fbfront_drawvert(old_x, old_y + 1, old_y + 8, 0xffffffff); + fbfront_drawhoriz(old_x + 1, old_x + 8, old_y, 0xffffffff); + fbfront_update(fb_dev, old_x, old_y, 9, 9); + } + old_x = new_x; + old_y = new_y; + fbfront_drawvert(new_x, new_y + 1, new_y + 8, 0xffffffff); + fbfront_drawhoriz(new_x + 1, new_x + 8, new_y, 0xffffffff); + fbfront_update(fb_dev, new_x, new_y, 9, 9); +} + +static struct kbdfront_dev *kbd_dev; +static struct semaphore kbd_sem = __SEMAPHORE_INITIALIZER(kbd_sem, 0); +static void kbdfront_thread(void *p) +{ + DEFINE_WAIT(w); + DEFINE_WAIT(w2); + DEFINE_WAIT(w3); + int x = WIDTH / 2, y = HEIGHT / 2, z = 0; + + kbd_dev = init_kbdfront(NULL, 1); + down(&fbfront_sem); + if (!kbd_dev) { + up(&kbd_sem); + return; + } + + refresh_cursor(x, y); + while (1) { + union xenkbd_in_event kbdevent; + union xenfb_in_event fbevent; + int sleep = 1; + + add_waiter(w, kbdfront_queue); + add_waiter(w2, fbfront_queue); + add_waiter(w3, shutdown_queue); + + rmb(); + if (do_shutdown) + break; + + while (kbdfront_receive(kbd_dev, &kbdevent, 1) != 0) { + sleep = 0; + switch(kbdevent.type) { + case XENKBD_TYPE_MOTION: + printk("motion x:%d y:%d z:%d\n", + kbdevent.motion.rel_x, + kbdevent.motion.rel_y, + kbdevent.motion.rel_z); + x += kbdevent.motion.rel_x; + y += kbdevent.motion.rel_y; + z += kbdevent.motion.rel_z; + clip_cursor(&x, &y); + refresh_cursor(x, y); + break; + case XENKBD_TYPE_POS: + printk("pos x:%d y:%d dz:%d\n", + kbdevent.pos.abs_x, + kbdevent.pos.abs_y, + kbdevent.pos.rel_z); + x = kbdevent.pos.abs_x; + y = kbdevent.pos.abs_y; + z = kbdevent.pos.rel_z; + clip_cursor(&x, &y); + refresh_cursor(x, y); + break; + case XENKBD_TYPE_KEY: + printk("key %d %s\n", + kbdevent.key.keycode, + kbdevent.key.pressed ? "pressed" : "released"); + if (kbdevent.key.keycode == BTN_LEFT) { + printk("mouse %s at (%d,%d,%d)\n", + kbdevent.key.pressed ? "clic" : "release", x, y, z); + if (kbdevent.key.pressed) { + uint32_t color = rand(); + fbfront_drawvert(x - 16, y - 16, y + 15, color); + fbfront_drawhoriz(x - 16, x + 15, y + 16, color); + fbfront_drawvert(x + 16, y - 15, y + 16, color); + fbfront_drawhoriz(x - 15, x + 16, y - 16, color); + fbfront_update(fb_dev, x - 16, y - 16, 33, 33); + } + } else if (kbdevent.key.keycode == KEY_Q) { + shutdown_reason = SHUTDOWN_poweroff; + wmb(); + do_shutdown = 1; + wmb(); + wake_up(&shutdown_queue); + } + break; + } + } + while (fbfront_receive(fb_dev, &fbevent, 1) != 0) { + sleep = 0; + switch(fbevent.type) { + case XENFB_TYPE_REFRESH_PERIOD: + refresh_period = fbevent.refresh_period.period; + printk("refresh period %d\n", refresh_period); + refresh_cursor(x, y); + break; + } + } + if (sleep) + schedule(); + remove_waiter(w3, shutdown_queue); + remove_waiter(w2, fbfront_queue); + remove_waiter(w, kbdfront_queue); + } + up(&kbd_sem); +} +#endif + +#ifdef CONFIG_PCIFRONT +static struct pcifront_dev *pci_dev; +static struct semaphore pci_sem = __SEMAPHORE_INITIALIZER(pci_sem, 0); + +static void print_pcidev(unsigned int domain, unsigned int bus, unsigned int slot, unsigned int fun) +{ + unsigned int vendor, device, rev, class; + + pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x00, 2, &vendor); + pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x02, 2, &device); + pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x08, 1, &rev); + pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x0a, 2, &class); + + printk("%04x:%02x:%02x.%02x %04x: %04x:%04x (rev %02x)\n", domain, bus, slot, fun, class, vendor, device, rev); +} + +static void pcifront_thread(void *p) +{ + pcifront_watches(NULL); + pci_dev = init_pcifront(NULL); + if (!pci_dev) { + up(&pci_sem); + return; + } + printk("PCI devices:\n"); + pcifront_scan(pci_dev, print_pcidev); + up(&pci_sem); +} +#endif + +void shutdown_frontends(void) +{ +#ifdef CONFIG_NETFRONT + down(&net_sem); + if (net_dev) + shutdown_netfront(net_dev); +#endif + +#ifdef CONFIG_BLKFRONT + down(&blk_sem); + if (blk_dev) + shutdown_blkfront(blk_dev); +#endif + +#if defined(CONFIG_FBFRONT) && defined(CONFIG_KBDFRONT) + if (fb_dev) + shutdown_fbfront(fb_dev); + + down(&kbd_sem); + if (kbd_dev) + shutdown_kbdfront(kbd_dev); +#endif + +#ifdef CONFIG_PCIFRONT + down(&pci_sem); + if (pci_dev) + shutdown_pcifront(pci_dev); +#endif +} + +#ifdef CONFIG_XENBUS +void app_shutdown(unsigned reason) +{ + shutdown_reason = reason; + wmb(); + do_shutdown = 1; + wmb(); + wake_up(&shutdown_queue); +} + +static void shutdown_thread(void *p) +{ + DEFINE_WAIT(w); + + while (1) { + add_waiter(w, shutdown_queue); + rmb(); + if (do_shutdown) { + rmb(); + break; + } + schedule(); + remove_waiter(w, shutdown_queue); + } + + shutdown_frontends(); + + HYPERVISOR_shutdown(shutdown_reason); +} +#endif + +int app_main(void *p) +{ + printk("Test main: par=%p\n", p); +#ifdef CONFIG_XENBUS + create_thread("xenbus_tester", xenbus_tester, p); +#endif + create_thread("periodic_thread", periodic_thread, p); +#ifdef CONFIG_NETFRONT + create_thread("netfront", netfront_thread, p); +#endif +#ifdef CONFIG_BLKFRONT + create_thread("blkfront", blkfront_thread, p); +#endif +#if defined(CONFIG_FBFRONT) && defined(CONFIG_KBDFRONT) + create_thread("fbfront", fbfront_thread, p); + create_thread("kbdfront", kbdfront_thread, p); +#endif +#ifdef CONFIG_PCIFRONT + create_thread("pcifront", pcifront_thread, p); +#endif +#ifdef CONFIG_XENBUS + create_thread("shutdown", shutdown_thread, p); +#endif + return 0; +} diff -Nru xen-4.9.0/extras/mini-os/tpmback.c xen-4.9.2/extras/mini-os/tpmback.c --- xen-4.9.0/extras/mini-os/tpmback.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/tpmback.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,1132 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/xen/tpmback/tpmback.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself derived from drivers/xen/netback/netback.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2002-2004, K A Fraser + * + * This code has also been derived from drivers/xen/tpmback/xenbus.c + * from the xen 2.6.18 linux kernel + * + * Copyright (C) 2005 IBM Corporation + * Copyright (C) 2005 Rusty Russell + * + * This code has also been derived from drivers/xen/tpmback/interface.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself also derived from drvivers/xen/netback/interface.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2 + * of the License + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +//#define TPMBACK_PRINT_DEBUG +#ifdef TPMBACK_PRINT_DEBUG +#define TPMBACK_DEBUG(fmt,...) printk("Tpmback:Debug("__FILE__":%d) " fmt, __LINE__, ##__VA_ARGS__) +#define TPMBACK_DEBUG_MORE(fmt,...) printk(fmt, ##__VA_ARGS__) +#else +#define TPMBACK_DEBUG(fmt,...) +#endif +#define TPMBACK_ERR(fmt,...) printk("Tpmback:Error " fmt, ##__VA_ARGS__) +#define TPMBACK_LOG(fmt,...) printk("Tpmback:Info " fmt, ##__VA_ARGS__) + +#define min(a,b) (((a) < (b)) ? (a) : (b)) + +/* Default size of the tpmif array at initialization */ +#define DEF_ARRAY_SIZE 1 + +/* tpmif and tpmdev flags */ +#define TPMIF_CLOSED 1 +#define TPMIF_REQ_READY 2 + +struct tpmif { + domid_t domid; + unsigned int handle; + + char* fe_path; + char* fe_state_path; + + /* Locally bound event channel*/ + evtchn_port_t evtchn; + + /* Shared page */ + tpmif_shared_page_t *page; + + enum xenbus_state state; + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; + + unsigned char uuid[16]; + void* opaque; + + /* state flags */ + int flags; +}; +typedef struct tpmif tpmif_t; + +struct tpmback_dev { + + tpmif_t** tpmlist; + unsigned long num_tpms; + unsigned long num_alloc; + + struct gntmap map; + + /* True if at least one tpmif has a request to be handled */ + int flags; + + xenbus_event_queue events; + + /* Callbacks */ + void (*open_callback)(domid_t, unsigned int); + void (*close_callback)(domid_t, unsigned int); +}; +typedef struct tpmback_dev tpmback_dev_t; + +enum { EV_NONE, EV_NEWFE, EV_STCHNG } tpm_ev_enum; + +/* Global objects */ +static struct thread* eventthread = NULL; +static tpmback_dev_t gtpmdev = { + .tpmlist = NULL, + .num_tpms = 0, + .num_alloc = 0, + .flags = TPMIF_CLOSED, + .events = NULL, + .open_callback = NULL, + .close_callback = NULL, +}; +struct wait_queue_head waitq; +int globalinit = 0; + +/************************************ + * TPMIF SORTED ARRAY FUNCTIONS + * tpmback_dev_t.tpmlist is a sorted array, sorted by domid and then handle number + * Duplicates are not allowed + * **********************************/ + +static void tpmif_req_ready(tpmif_t* tpmif) { + tpmif->flags |= TPMIF_REQ_READY; + gtpmdev.flags |= TPMIF_REQ_READY; +} + +static void tpmdev_check_req(void) { + int i; + int flags; + local_irq_save(flags); + for(i = 0; i < gtpmdev.num_tpms; ++i) { + if(gtpmdev.tpmlist[i]->flags & TPMIF_REQ_READY) { + gtpmdev.flags |= TPMIF_REQ_READY; + local_irq_restore(flags); + return; + } + } + gtpmdev.flags &= ~TPMIF_REQ_READY; + local_irq_restore(flags); +} + +static void tpmif_req_finished(tpmif_t* tpmif) { + tpmif->flags &= ~TPMIF_REQ_READY; + tpmdev_check_req(); +} + +int __get_tpmif_index(int st, int n, domid_t domid, unsigned int handle) +{ + int i = st + n /2; + tpmif_t* tmp; + + if( n <= 0 ) + return -1; + + tmp = gtpmdev.tpmlist[i]; + if(domid == tmp->domid && tmp->handle == handle) { + return i; + } else if ( (domid < tmp->domid) || + (domid == tmp->domid && handle < tmp->handle)) { + return __get_tpmif_index(st, n/2, domid, handle); + } else { + return __get_tpmif_index(i + 1, n/2 - ((n +1) % 2), domid, handle); + } +} + +/* Returns the array index of the tpmif domid/handle. Returns -1 if no such tpmif exists */ +int get_tpmif_index(domid_t domid, unsigned int handle) +{ + int flags; + int index; + local_irq_save(flags); + index = __get_tpmif_index(0, gtpmdev.num_tpms, domid, handle); + local_irq_restore(flags); + return index; +} + +/* Returns the tpmif domid/handle or NULL if none exists */ +tpmif_t* get_tpmif(domid_t domid, unsigned int handle) +{ + int flags; + int i; + tpmif_t* ret; + local_irq_save(flags); + i = get_tpmif_index(domid, handle); + if (i < 0) { + ret = NULL; + } else { + ret = gtpmdev.tpmlist[i]; + } + local_irq_restore(flags); + return ret; +} + +/* Remove the given tpmif. Returns 0 if it was removed, -1 if it was not removed */ +int remove_tpmif(tpmif_t* tpmif) +{ + int i, j; + char* err; + int flags; + local_irq_save(flags); + + /* Find the index in the array if it exists */ + i = get_tpmif_index(tpmif->domid, tpmif->handle); + if (i < 0) { + goto error; + } + + /* Remove the interface from the list */ + for(j = i; j < gtpmdev.num_tpms - 1; ++j) { + gtpmdev.tpmlist[j] = gtpmdev.tpmlist[j+1]; + } + gtpmdev.tpmlist[j] = NULL; + --gtpmdev.num_tpms; + + /* If removed tpm was the only ready tpm, then we need to check and turn off the ready flag */ + tpmdev_check_req(); + + local_irq_restore(flags); + + /* Stop listening for events on this tpm interface */ + if((err = xenbus_unwatch_path_token(XBT_NIL, tpmif->fe_state_path, tpmif->fe_state_path))) { + TPMBACK_ERR("Unable to unwatch path token `%s' Error was %s Ignoring..\n", tpmif->fe_state_path, err); + free(err); + } + + return 0; +error: + local_irq_restore(flags); + return -1; +} + +/* Insert tpmif into dev->tpmlist. Returns 0 on success and non zero on error. + * It is an error to insert a tpmif with the same domid and handle + * number + * as something already in the list */ +int insert_tpmif(tpmif_t* tpmif) +{ + int flags; + unsigned int i, j; + tpmif_t* tmp; + char* err; + char path[512]; + + local_irq_save(flags); + + /*Check if we need to allocate more space */ + if (gtpmdev.num_tpms == gtpmdev.num_alloc) { + gtpmdev.num_alloc *= 2; + gtpmdev.tpmlist = realloc(gtpmdev.tpmlist, gtpmdev.num_alloc); + } + + /*Find where to put the new interface */ + for(i = 0; i < gtpmdev.num_tpms; ++i) + { + tmp = gtpmdev.tpmlist[i]; + if(tpmif->domid == tmp->domid && tpmif->handle == tmp->handle) { + TPMBACK_ERR("Tried to insert duplicate tpm interface %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + goto error; + } + if((tpmif->domid < tmp->domid) || + (tpmif->domid == tmp->domid && tpmif->handle < tmp->handle)) { + break; + } + } + + /*Shift all the tpm pointers past i down one */ + for(j = gtpmdev.num_tpms; j > i; --j) { + gtpmdev.tpmlist[j] = gtpmdev.tpmlist[j-1]; + } + + /*Add the new interface */ + gtpmdev.tpmlist[i] = tpmif; + ++gtpmdev.num_tpms; + + /*Should not be needed, anything inserted with ready flag is probably an error */ + tpmdev_check_req(); + + local_irq_restore(flags); + + snprintf(path, 512, "backend/vtpm/%u/%u/feature-protocol-v2", (unsigned int) tpmif->domid, tpmif->handle); + if ((err = xenbus_write(XBT_NIL, path, "1"))) + { + /* if we got an error here we should carefully remove the interface and then return */ + TPMBACK_ERR("Unable to write feature-protocol-v2 node: %s\n", err); + free(err); + remove_tpmif(tpmif); + goto error_post_irq; + } + + /*Listen for state changes on the new interface */ + if((err = xenbus_watch_path_token(XBT_NIL, tpmif->fe_state_path, tpmif->fe_state_path, >pmdev.events))) + { + /* if we got an error here we should carefully remove the interface and then return */ + TPMBACK_ERR("Unable to watch path token `%s' Error was %s\n", tpmif->fe_state_path, err); + free(err); + remove_tpmif(tpmif); + goto error_post_irq; + } + return 0; +error: + local_irq_restore(flags); +error_post_irq: + return -1; +} + + +/***************** + * CHANGE BACKEND STATE + * *****************/ +/*Attempts to change the backend state in xenstore + * returns 0 on success and non-zero on error */ +int tpmif_change_state(tpmif_t* tpmif, enum xenbus_state state) +{ + int tempst; + char path[512]; + char *value; + char *err; + enum xenbus_state readst; + TPMBACK_DEBUG("Backend state change %u/%u from=%d to=%d\n", (unsigned int) tpmif->domid, tpmif->handle, tpmif->state, state); + if (tpmif->state == state) + return 0; + + snprintf(path, 512, "backend/vtpm/%u/%u/state", (unsigned int) tpmif->domid, tpmif->handle); + + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMBACK_ERR("Unable to read backend state %s, error was %s\n", path, err); + free(err); + return -1; + } + if(sscanf(value, "%d", &tempst) != 1) { + TPMBACK_ERR("Non integer value (%s) in %s ??\n", value, path); + free(value); + return -1; + } + readst = (enum xenbus_state) tempst; + free(value); + + /* It's possible that the backend state got updated by hotplug or something else behind our back */ + if(readst != tpmif->state) { + TPMBACK_DEBUG("tpm interface state was %d but xenstore state was %d!\n", tpmif->state, readst); + tpmif->state = readst; + } + + /*If if the state isnt changing, then we dont update xenstore b/c we dont want to fire extraneous events */ + if(tpmif->state == state) { + return 0; + } + + /*update xenstore*/ + snprintf(path, 512, "backend/vtpm/%u/%u", (unsigned int) tpmif->domid, tpmif->handle); + if((err = xenbus_printf(XBT_NIL, path, "state", "%u", state))) { + TPMBACK_ERR("Error writing to xenstore %s, error was %s new state=%d\n", path, err, state); + free(err); + return -1; + } + + tpmif->state = state; + + return 0; +} +/********************************** + * TPMIF CREATION AND DELETION + * *******************************/ +static tpmif_t* __init_tpmif(domid_t domid, unsigned int handle) +{ + tpmif_t* tpmif; + tpmif = malloc(sizeof(*tpmif)); + tpmif->domid = domid; + tpmif->handle = handle; + tpmif->fe_path = NULL; + tpmif->fe_state_path = NULL; + tpmif->state = XenbusStateInitialising; + tpmif->status = DISCONNECTED; + tpmif->page = NULL; + tpmif->flags = 0; + tpmif->opaque = NULL; + memset(tpmif->uuid, 0, sizeof(tpmif->uuid)); + return tpmif; +} + +void __free_tpmif(tpmif_t* tpmif) +{ + if(tpmif->fe_path) { + free(tpmif->fe_path); + } + if(tpmif->fe_state_path) { + free(tpmif->fe_state_path); + } + free(tpmif); +} +/* Creates a new tpm interface, adds it to the sorted array and returns it. + * returns NULL on error + * If the tpm interface already exists, it is returned*/ +tpmif_t* new_tpmif(domid_t domid, unsigned int handle) +{ + tpmif_t* tpmif; + char* err; + char path[512]; + + /* Make sure we haven't already created this tpm + * Double events can occur */ + if((tpmif = get_tpmif(domid, handle)) != NULL) { + return tpmif; + } + + tpmif = __init_tpmif(domid, handle); + + /* Get the uuid from xenstore */ + snprintf(path, 512, "backend/vtpm/%u/%u/uuid", (unsigned int) domid, handle); + if((!xenbus_read_uuid(path, tpmif->uuid))) { + TPMBACK_ERR("Error reading %s\n", path); + goto error; + } + + if(tpmif_change_state(tpmif, XenbusStateInitWait)) { + goto error; + } + + snprintf(path, 512, "backend/vtpm/%u/%u/frontend", (unsigned int) domid, handle); + if((err = xenbus_read(XBT_NIL, path, &tpmif->fe_path))) { + TPMBACK_ERR("Error creating new tpm instance xenbus_read(%s), Error = %s", path, err); + free(err); + goto error; + } + + /*Set the state path */ + tpmif->fe_state_path = malloc(strlen(tpmif->fe_path) + 7); + strcpy(tpmif->fe_state_path, tpmif->fe_path); + strcat(tpmif->fe_state_path, "/state"); + + if(insert_tpmif(tpmif)) { + goto error; + } + TPMBACK_DEBUG("New tpmif %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + /* Do the callback now */ + if(gtpmdev.open_callback) { + gtpmdev.open_callback(tpmif->domid, tpmif->handle); + } + return tpmif; +error: + __free_tpmif(tpmif); + return NULL; + +} + +/* Removes tpmif from dev->tpmlist and frees it's memory usage */ +void free_tpmif(tpmif_t* tpmif) +{ + char* err; + char path[512]; + TPMBACK_DEBUG("Free tpmif %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + if(tpmif->flags & TPMIF_CLOSED) { + TPMBACK_ERR("Tried to free an instance twice! Theres a bug somewhere!\n"); + BUG(); + } + tpmif->flags = TPMIF_CLOSED; + + tpmif_change_state(tpmif, XenbusStateClosing); + + /* Unmap share page and unbind event channel */ + if(tpmif->status == CONNECTED) { + tpmif->status = DISCONNECTING; + mask_evtchn(tpmif->evtchn); + + if(gntmap_munmap(>pmdev.map, (unsigned long)tpmif->page, 1)) { + TPMBACK_ERR("%u/%u Error occured while trying to unmap shared page\n", (unsigned int) tpmif->domid, tpmif->handle); + } + + unbind_evtchn(tpmif->evtchn); + } + tpmif->status = DISCONNECTED; + tpmif_change_state(tpmif, XenbusStateClosed); + + /* Do the callback now */ + if(gtpmdev.close_callback) { + gtpmdev.close_callback(tpmif->domid, tpmif->handle); + } + + /* remove from array */ + remove_tpmif(tpmif); + + /* Wake up anyone possibly waiting on this interface and let them exit */ + wake_up(&waitq); + schedule(); + + /* Remove the old xenbus entries */ + snprintf(path, 512, "backend/vtpm/%u/%u", (unsigned int) tpmif->domid, tpmif->handle); + if((err = xenbus_rm(XBT_NIL, path))) { + TPMBACK_ERR("Error cleaning up xenbus entries path=%s error=%s\n", path, err); + free(err); + } + + TPMBACK_LOG("Frontend %u/%u disconnected\n", (unsigned int) tpmif->domid, tpmif->handle); + + /* free memory */ + __free_tpmif(tpmif); + +} + +/********************** + * REMAINING TPMBACK FUNCTIONS + * ********************/ + +/*Event channel handler */ +void tpmback_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ + tpmif_t* tpmif = (tpmif_t*) data; + tpmif_shared_page_t *pg = tpmif->page; + + switch (pg->state) + { + case TPMIF_STATE_SUBMIT: + TPMBACK_DEBUG("EVENT CHANNEL FIRE %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + tpmif_req_ready(tpmif); + wake_up(&waitq); + break; + case TPMIF_STATE_CANCEL: + /* If we are busy with a request, do nothing */ + if (tpmif->flags & TPMIF_REQ_READY) + return; + /* Acknowledge the cancellation if we are idle */ + pg->state = TPMIF_STATE_IDLE; + wmb(); + notify_remote_via_evtchn(tpmif->evtchn); + return; + default: + /* Spurious wakeup; do nothing */ + return; + } +} + +/* Connect to frontend */ +int connect_fe(tpmif_t* tpmif) +{ + char path[512]; + char* err, *value; + uint32_t domid; + grant_ref_t ringref; + evtchn_port_t evtchn; + + /* If already connected then quit */ + if (tpmif->status == CONNECTED) { + TPMBACK_DEBUG("%u/%u tried to connect while it was already connected?\n", (unsigned int) tpmif->domid, tpmif->handle); + return 0; + } + + /* Fetch the grant reference */ + snprintf(path, 512, "%s/ring-ref", tpmif->fe_path); + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMBACK_ERR("Error creating new tpm instance xenbus_read(%s) Error = %s", path, err); + free(err); + return -1; + } + if(sscanf(value, "%d", &ringref) != 1) { + TPMBACK_ERR("Non integer value (%s) in %s ??\n", value, path); + free(value); + return -1; + } + free(value); + + + /* Fetch the event channel*/ + snprintf(path, 512, "%s/event-channel", tpmif->fe_path); + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMBACK_ERR("Error creating new tpm instance xenbus_read(%s) Error = %s", path, err); + free(err); + return -1; + } + if(sscanf(value, "%d", &evtchn) != 1) { + TPMBACK_ERR("Non integer value (%s) in %s ??\n", value, path); + free(value); + return -1; + } + free(value); + + /* Check that protocol v2 is being used */ + snprintf(path, 512, "%s/feature-protocol-v2", tpmif->fe_path); + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMBACK_ERR("Unable to read %s during tpmback initialization! error = %s\n", path, err); + free(err); + return -1; + } + if(strcmp(value, "1")) { + TPMBACK_ERR("%s has an invalid value (%s)\n", path, value); + free(value); + return -1; + } + free(value); + + domid = tpmif->domid; + if((tpmif->page = gntmap_map_grant_refs(>pmdev.map, 1, &domid, 0, &ringref, PROT_READ | PROT_WRITE)) == NULL) { + TPMBACK_ERR("Failed to map grant reference %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + return -1; + } + + /*Bind the event channel */ + if((evtchn_bind_interdomain(tpmif->domid, evtchn, tpmback_handler, tpmif, &tpmif->evtchn))) + { + TPMBACK_ERR("%u/%u Unable to bind to interdomain event channel!\n", (unsigned int) tpmif->domid, tpmif->handle); + goto error_post_map; + } + unmask_evtchn(tpmif->evtchn); + + /* Write the ready flag and change status to connected */ + snprintf(path, 512, "backend/vtpm/%u/%u", (unsigned int) tpmif->domid, tpmif->handle); + if((err = xenbus_printf(XBT_NIL, path, "ready", "%u", 1))) { + TPMBACK_ERR("%u/%u Unable to write ready flag on connect_fe()\n", (unsigned int) tpmif->domid, tpmif->handle); + free(err); + goto error_post_evtchn; + } + tpmif->status = CONNECTED; + if((tpmif_change_state(tpmif, XenbusStateConnected))){ + goto error_post_evtchn; + } + + TPMBACK_LOG("Frontend %u/%u connected\n", (unsigned int) tpmif->domid, tpmif->handle); + + return 0; +error_post_evtchn: + mask_evtchn(tpmif->evtchn); + unbind_evtchn(tpmif->evtchn); +error_post_map: + gntmap_munmap(>pmdev.map, (unsigned long)tpmif->page, 1); + return -1; +} + +static void disconnect_fe(tpmif_t* tpmif) +{ + if (tpmif->status == CONNECTED) { + tpmif->status = DISCONNECTING; + mask_evtchn(tpmif->evtchn); + + if(gntmap_munmap(>pmdev.map, (unsigned long)tpmif->page, 1)) { + TPMBACK_ERR("%u/%u Error occured while trying to unmap shared page\n", (unsigned int) tpmif->domid, tpmif->handle); + } + + unbind_evtchn(tpmif->evtchn); + } + tpmif->status = DISCONNECTED; + tpmif_change_state(tpmif, XenbusStateInitWait); + + TPMBACK_LOG("Frontend %u/%u disconnected\n", (unsigned int) tpmif->domid, tpmif->handle); +} + +static int frontend_changed(tpmif_t* tpmif) +{ + int state = xenbus_read_integer(tpmif->fe_state_path); + if(state < 0) { + state = XenbusStateUnknown; + } + + TPMBACK_DEBUG("Frontend %u/%u state changed to %d\n", (unsigned int) tpmif->domid, tpmif->handle, state); + + switch (state) { + case XenbusStateInitialising: + break; + + case XenbusStateInitialised: + case XenbusStateConnected: + if(connect_fe(tpmif)) { + TPMBACK_ERR("Failed to connect to front end %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + tpmif_change_state(tpmif, XenbusStateClosed); + return -1; + } + break; + + case XenbusStateClosing: + tpmif_change_state(tpmif, XenbusStateClosing); + break; + + case XenbusStateClosed: + disconnect_fe(tpmif); + break; + + case XenbusStateUnknown: /* keep it here */ + free_tpmif(tpmif); + break; + + default: + TPMBACK_DEBUG("BAD STATE CHANGE %u/%u state = %d for tpmif\n", (unsigned int) tpmif->domid, tpmif->handle, state); + return -1; + } + return 0; +} + + +/* parses the string that comes out of xenbus_watch_wait_return. */ +static int parse_eventstr(const char* evstr, domid_t* domid, unsigned int* handle) +{ + int ret; + char cmd[40]; + char* err; + char* value; + unsigned int udomid = 0; + tpmif_t* tpmif; + /* First check for new frontends, this occurs when /backend/vtpm// gets created. Note we what the sscanf to fail on the last %s */ + if (sscanf(evstr, "backend/vtpm/%u/%u/%40s", &udomid, handle, cmd) == 2) { + *domid = udomid; + /* Make sure the entry exists, if this event triggers because the entry dissapeared then ignore it */ + if((err = xenbus_read(XBT_NIL, evstr, &value))) { + free(err); + return EV_NONE; + } + free(value); + /* Make sure the tpmif entry does not already exist, this should not happen */ + if((tpmif = get_tpmif(*domid, *handle)) != NULL) { + TPMBACK_DEBUG("Duplicate tpm entries! %u %u\n", tpmif->domid, tpmif->handle); + return EV_NONE; + } + return EV_NEWFE; + } else if((ret = sscanf(evstr, "/local/domain/%u/device/vtpm/%u/%40s", &udomid, handle, cmd)) == 3) { + *domid = udomid; + if (!strcmp(cmd, "state")) + return EV_STCHNG; + } + return EV_NONE; +} + +void handle_backend_event(char* evstr) { + tpmif_t* tpmif; + domid_t domid; + unsigned int handle; + int event; + + TPMBACK_DEBUG("Xenbus Event: %s\n", evstr); + + event = parse_eventstr(evstr, &domid, &handle); + + switch(event) { + case EV_NEWFE: + if(new_tpmif(domid, handle) == NULL) { + TPMBACK_ERR("Failed to create new tpm instance %u/%u\n", (unsigned int) domid, handle); + } + wake_up(&waitq); + break; + case EV_STCHNG: + if((tpmif = get_tpmif(domid, handle))) { + frontend_changed(tpmif); + } else { + TPMBACK_DEBUG("Event Received for non-existant tpm! instance=%u/%u xenbus_event=%s\n", (unsigned int) domid, handle, evstr); + } + break; + } +} + +/* Runs through the given path and creates events recursively + * for all of its children. + * @path - xenstore path to scan */ +static void generate_backend_events(const char* path) +{ + char* err; + int i, len; + char **dirs; + char *entry; + + if((err = xenbus_ls(XBT_NIL, path, &dirs)) != NULL) { + free(err); + return; + } + + for(i = 0; dirs[i] != NULL; ++i) { + len = strlen(path) + strlen(dirs[i]) + 2; + entry = malloc(len); + snprintf(entry, len, "%s/%s", path, dirs[i]); + + /* Generate and handle event for the entry itself */ + handle_backend_event(entry); + + /* Do children */ + generate_backend_events(entry); + + /* Cleanup */ + free(entry); + free(dirs[i]); + } + free(dirs); + return; +} + +void* tpmback_get_opaque(domid_t domid, unsigned int handle) +{ + tpmif_t* tpmif; + if((tpmif = get_tpmif(domid, handle)) == NULL) { + TPMBACK_DEBUG("get_opaque() failed, %u/%u is an invalid frontend\n", (unsigned int) domid, handle); + return NULL; + } + + return tpmif->opaque; +} + +int tpmback_set_opaque(domid_t domid, unsigned int handle, void *opaque) +{ + tpmif_t* tpmif; + if((tpmif = get_tpmif(domid, handle)) == NULL) { + TPMBACK_DEBUG("set_opaque() failed, %u/%u is an invalid frontend\n", (unsigned int) domid, handle); + return -1; + } + + tpmif->opaque = opaque; + return 0; +} + +unsigned char* tpmback_get_uuid(domid_t domid, unsigned int handle) +{ + tpmif_t* tpmif; + if((tpmif = get_tpmif(domid, handle)) == NULL) { + TPMBACK_DEBUG("get_uuid() failed, %u/%u is an invalid frontend\n", (unsigned int) domid, handle); + return NULL; + } + + return tpmif->uuid; +} + +int tpmback_get_peercontext(domid_t domid, unsigned int handle, void* buffer, int buflen) +{ + tpmif_t* tpmif; + if((tpmif = get_tpmif(domid, handle)) == NULL) { + TPMBACK_DEBUG("get_uuid() failed, %u/%u is an invalid frontend\n", (unsigned int) domid, handle); + return -1; + } + + return evtchn_get_peercontext(tpmif->evtchn, buffer, buflen); +} + +static void event_listener(void) +{ + const char* bepath = "backend/vtpm"; + char **path; + char* err; + + /* Setup the backend device watch */ + if((err = xenbus_watch_path_token(XBT_NIL, bepath, bepath, >pmdev.events)) != NULL) { + TPMBACK_ERR("xenbus_watch_path_token(%s) failed with error %s!\n", bepath, err); + free(err); + goto egress; + } + + /* Check for any frontends that connected before we set the watch. + * This is almost guaranteed to happen if both domains are started + * immediatly one after the other. + * We do this by manually generating events on everything in the backend + * path */ + generate_backend_events(bepath); + + /* Wait and listen for changes in frontend connections */ + while(1) { + path = xenbus_wait_for_watch_return(>pmdev.events); + + /*If quit flag was set then exit */ + if(gtpmdev.flags & TPMIF_CLOSED) { + TPMBACK_DEBUG("listener thread got quit event. Exiting..\n"); + free(path); + break; + } + handle_backend_event(*path); + free(path); + + } + + if((err = xenbus_unwatch_path_token(XBT_NIL, bepath, bepath)) != NULL) { + free(err); + } +egress: + return; +} + +void event_thread(void* p) { + event_listener(); +} + +void init_tpmback(void (*open_cb)(domid_t, unsigned int), void (*close_cb)(domid_t, unsigned int)) +{ + if(!globalinit) { + init_waitqueue_head(&waitq); + globalinit = 1; + } + printk("============= Init TPM BACK ================\n"); + gtpmdev.tpmlist = malloc(sizeof(tpmif_t*) * DEF_ARRAY_SIZE); + gtpmdev.num_alloc = DEF_ARRAY_SIZE; + gtpmdev.num_tpms = 0; + gtpmdev.flags = 0; + + gtpmdev.open_callback = open_cb; + gtpmdev.close_callback = close_cb; + + eventthread = create_thread("tpmback-listener", event_thread, NULL); + +} + +void shutdown_tpmback(void) +{ + TPMBACK_LOG("Shutting down tpm backend\n"); + /* Set the quit flag */ + gtpmdev.flags = TPMIF_CLOSED; + + //printk("num tpms is %d\n", gtpmdev.num_tpms); + /*Free all backend instances */ + while(gtpmdev.num_tpms) { + free_tpmif(gtpmdev.tpmlist[0]); + } + free(gtpmdev.tpmlist); + gtpmdev.tpmlist = NULL; + gtpmdev.num_alloc = 0; + + /* Wake up anyone possibly waiting on the device and let them exit */ + wake_up(&waitq); + schedule(); +} + +static void init_tpmcmd(tpmcmd_t* tpmcmd, domid_t domid, unsigned int handle, void *opaque) +{ + tpmcmd->domid = domid; + tpmcmd->locality = -1; + tpmcmd->handle = handle; + tpmcmd->opaque = opaque; + tpmcmd->req = NULL; + tpmcmd->req_len = 0; + tpmcmd->resp = NULL; + tpmcmd->resp_len = 0; +} + +tpmcmd_t* get_request(tpmif_t* tpmif) { + tpmcmd_t* cmd; + tpmif_shared_page_t *shr; + unsigned int offset; + int flags; +#ifdef TPMBACK_PRINT_DEBUG + int i; +#endif + + local_irq_save(flags); + + /* Allocate the cmd object to hold the data */ + if((cmd = malloc(sizeof(*cmd))) == NULL) { + goto error; + } + init_tpmcmd(cmd, tpmif->domid, tpmif->handle, tpmif->opaque); + + shr = tpmif->page; + cmd->req_len = shr->length; + cmd->locality = shr->locality; + offset = sizeof(*shr) + 4*shr->nr_extra_pages; + if (offset > PAGE_SIZE || offset + cmd->req_len > PAGE_SIZE) { + TPMBACK_ERR("%u/%u Command size too long for shared page!\n", (unsigned int) tpmif->domid, tpmif->handle); + goto error; + } + /* Allocate the buffer */ + if(cmd->req_len) { + if((cmd->req = malloc(cmd->req_len)) == NULL) { + goto error; + } + } + /* Copy the bits from the shared page(s) */ + memcpy(cmd->req, offset + (uint8_t*)shr, cmd->req_len); + +#ifdef TPMBACK_PRINT_DEBUG + TPMBACK_DEBUG("Received Tpm Command from %u/%u of size %u", (unsigned int) tpmif->domid, tpmif->handle, cmd->req_len); + for(i = 0; i < cmd->req_len; ++i) { + if (!(i % 30)) { + TPMBACK_DEBUG_MORE("\n"); + } + TPMBACK_DEBUG_MORE("%02hhX ", cmd->req[i]); + } + TPMBACK_DEBUG_MORE("\n\n"); +#endif + + local_irq_restore(flags); + return cmd; +error: + if(cmd != NULL) { + if (cmd->req != NULL) { + free(cmd->req); + cmd->req = NULL; + } + free(cmd); + cmd = NULL; + } + local_irq_restore(flags); + return NULL; + +} + +void send_response(tpmcmd_t* cmd, tpmif_t* tpmif) +{ + tpmif_shared_page_t *shr; + unsigned int offset; + int flags; +#ifdef TPMBACK_PRINT_DEBUG +int i; +#endif + + local_irq_save(flags); + + shr = tpmif->page; + shr->length = cmd->resp_len; + + offset = sizeof(*shr) + 4*shr->nr_extra_pages; + if (offset > PAGE_SIZE || offset + cmd->resp_len > PAGE_SIZE) { + TPMBACK_ERR("%u/%u Command size too long for shared page!\n", (unsigned int) tpmif->domid, tpmif->handle); + goto error; + } + memcpy(offset + (uint8_t*)shr, cmd->resp, cmd->resp_len); + +#ifdef TPMBACK_PRINT_DEBUG + TPMBACK_DEBUG("Sent response to %u/%u of size %u", (unsigned int) tpmif->domid, tpmif->handle, cmd->resp_len); + for(i = 0; i < cmd->resp_len; ++i) { + if (!(i % 30)) { + TPMBACK_DEBUG_MORE("\n"); + } + TPMBACK_DEBUG_MORE("%02hhX ", cmd->resp[i]); + } + TPMBACK_DEBUG_MORE("\n\n"); +#endif + /* clear the ready flag and send the event channel notice to the frontend */ + tpmif_req_finished(tpmif); + barrier(); + shr->state = TPMIF_STATE_FINISH; + wmb(); + notify_remote_via_evtchn(tpmif->evtchn); +error: + local_irq_restore(flags); + return; +} + +tpmcmd_t* tpmback_req_any(void) +{ + int i; + /* Block until something has a request */ + wait_event(waitq, (gtpmdev.flags & (TPMIF_REQ_READY | TPMIF_CLOSED))); + + /* Check if were shutting down */ + if(gtpmdev.flags & TPMIF_CLOSED) { + /* if something was waiting for us to give up the queue so it can shutdown, let it finish */ + schedule(); + return NULL; + } + + for(i = 0; i < gtpmdev.num_tpms; ++i) { + if(gtpmdev.tpmlist[i]->flags & TPMIF_REQ_READY) { + return get_request(gtpmdev.tpmlist[i]); + } + } + + TPMBACK_ERR("backend request ready flag was set but no interfaces were actually ready\n"); + return NULL; +} + +tpmcmd_t* tpmback_req(domid_t domid, unsigned int handle) +{ + tpmif_t* tpmif; + tpmif = get_tpmif(domid, handle); + if(tpmif == NULL) { + return NULL; + } + + wait_event(waitq, (tpmif->flags & (TPMIF_REQ_READY | TPMIF_CLOSED) || gtpmdev.flags & TPMIF_CLOSED)); + + /* Check if were shutting down */ + if(tpmif->flags & TPMIF_CLOSED || gtpmdev.flags & TPMIF_CLOSED) { + /* if something was waiting for us to give up the queue so it can free this instance, let it finish */ + schedule(); + return NULL; + } + + return get_request(tpmif); +} + +void tpmback_resp(tpmcmd_t* tpmcmd) +{ + tpmif_t* tpmif; + + /* Get the associated interface, if it doesnt exist then just quit */ + tpmif = get_tpmif(tpmcmd->domid, tpmcmd->handle); + if(tpmif == NULL) { + TPMBACK_ERR("Tried to send a reponse to non existant frontend %u/%u\n", (unsigned int) tpmcmd->domid, tpmcmd->handle); + goto end; + } + + if(!(tpmif->flags & TPMIF_REQ_READY)) { + TPMBACK_ERR("Tried to send response to a frontend that was not waiting for one %u/%u\n", (unsigned int) tpmcmd->domid, tpmcmd->handle); + goto end; + } + + /* Send response to frontend */ + send_response(tpmcmd, tpmif); + +end: + if(tpmcmd->req != NULL) { + free(tpmcmd->req); + } + free(tpmcmd); + return; +} + +int tpmback_wait_for_frontend_connect(domid_t *domid, unsigned int *handle) +{ + tpmif_t* tpmif; + int flags; + wait_event(waitq, ((gtpmdev.num_tpms > 0) || gtpmdev.flags & TPMIF_CLOSED)); + if(gtpmdev.flags & TPMIF_CLOSED) { + return -1; + } + local_irq_save(flags); + tpmif = gtpmdev.tpmlist[0]; + *domid = tpmif->domid; + *handle = tpmif->handle; + local_irq_restore(flags); + + return 0; +} + +int tpmback_num_frontends(void) +{ + return gtpmdev.num_tpms; +} diff -Nru xen-4.9.0/extras/mini-os/tpmfront.c xen-4.9.2/extras/mini-os/tpmfront.c --- xen-4.9.0/extras/mini-os/tpmfront.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/tpmfront.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,631 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/char/tpm_vtpm.c + * from the xen 2.6.18 linux kernel + * + * Copyright (C) 2006 IBM Corporation + * + * This code has also been derived from drivers/char/tpm_xen.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself derived from drivers/xen/netfront/netfront.c + * from the linux kernel + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#define TPMFRONT_PRINT_DEBUG +#ifdef TPMFRONT_PRINT_DEBUG +#define TPMFRONT_DEBUG(fmt,...) printk("Tpmfront:Debug("__FILE__":%d) " fmt, __LINE__, ##__VA_ARGS__) +#define TPMFRONT_DEBUG_MORE(fmt,...) printk(fmt, ##__VA_ARGS__) +#else +#define TPMFRONT_DEBUG(fmt,...) +#endif +#define TPMFRONT_ERR(fmt,...) printk("Tpmfront:Error " fmt, ##__VA_ARGS__) +#define TPMFRONT_LOG(fmt,...) printk("Tpmfront:Info " fmt, ##__VA_ARGS__) + +#define min(a,b) (((a) < (b)) ? (a) : (b)) + +void tpmfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) { + struct tpmfront_dev* dev = (struct tpmfront_dev*) data; + tpmif_shared_page_t *shr = dev->page; + /*If we get a response when we didnt make a request, just ignore it */ + if(!dev->waiting) { + return; + } + + switch (shr->state) { + case TPMIF_STATE_FINISH: /* request was completed */ + case TPMIF_STATE_IDLE: /* request was cancelled */ + break; + default: + /* Spurious wakeup; do nothing, request is still pending */ + return; + } + + dev->waiting = 0; +#ifdef HAVE_LIBC + if(dev->fd >= 0) { + files[dev->fd].read = 1; + } +#endif + wake_up(&dev->waitq); +} + +static int publish_xenbus(struct tpmfront_dev* dev) { + xenbus_transaction_t xbt; + int retry; + char* err; + /* Write the grant reference and event channel to xenstore */ +again: + if((err = xenbus_transaction_start(&xbt))) { + TPMFRONT_ERR("Unable to start xenbus transaction, error was %s\n", err); + free(err); + return -1; + } + + if((err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u", (unsigned int) dev->ring_ref))) { + TPMFRONT_ERR("Unable to write %s/ring-ref, error was %s\n", dev->nodename, err); + free(err); + goto abort_transaction; + } + + if((err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", (unsigned int) dev->evtchn))) { + TPMFRONT_ERR("Unable to write %s/event-channel, error was %s\n", dev->nodename, err); + free(err); + goto abort_transaction; + } + + if((err = xenbus_transaction_end(xbt, 0, &retry))) { + TPMFRONT_ERR("Unable to complete xenbus transaction, error was %s\n", err); + free(err); + return -1; + } + if(retry) { + goto again; + } + + return 0; +abort_transaction: + if((err = xenbus_transaction_end(xbt, 1, &retry))) { + free(err); + } + return -1; +} + +static int wait_for_backend_connect(xenbus_event_queue* events, char* path) +{ + int state; + + TPMFRONT_LOG("Waiting for backend connection..\n"); + /* Wait for the backend to connect */ + while(1) { + state = xenbus_read_integer(path); + if ( state < 0) + state = XenbusStateUnknown; + switch(state) { + /* Bad states, we quit with error */ + case XenbusStateUnknown: + case XenbusStateClosing: + case XenbusStateClosed: + TPMFRONT_ERR("Unable to connect to backend\n"); + return -1; + /* If backend is connected then break out of loop */ + case XenbusStateConnected: + TPMFRONT_LOG("Backend Connected\n"); + return 0; + default: + xenbus_wait_for_watch(events); + } + } + +} + +static int wait_for_backend_closed(xenbus_event_queue* events, char* path) +{ + int state; + + TPMFRONT_LOG("Waiting for backend to close..\n"); + while(1) { + state = xenbus_read_integer(path); + if ( state < 0) + state = XenbusStateUnknown; + switch(state) { + case XenbusStateUnknown: + TPMFRONT_ERR("Backend Unknown state, forcing shutdown\n"); + return -1; + case XenbusStateClosed: + TPMFRONT_LOG("Backend Closed\n"); + return 0; + case XenbusStateInitWait: + TPMFRONT_LOG("Backend Closed (waiting for reconnect)\n"); + return 0; + default: + xenbus_wait_for_watch(events); + } + } + +} + +static int wait_for_backend_state_changed(struct tpmfront_dev* dev, XenbusState state) { + char* err; + int ret = 0; + xenbus_event_queue events = NULL; + char path[512]; + + snprintf(path, 512, "%s/state", dev->bepath); + /*Setup the watch to wait for the backend */ + if((err = xenbus_watch_path_token(XBT_NIL, path, path, &events))) { + TPMFRONT_ERR("Could not set a watch on %s, error was %s\n", path, err); + free(err); + return -1; + } + + /* Do the actual wait loop now */ + switch(state) { + case XenbusStateConnected: + ret = wait_for_backend_connect(&events, path); + break; + case XenbusStateClosed: + ret = wait_for_backend_closed(&events, path); + break; + default: + TPMFRONT_ERR("Bad wait state %d, ignoring\n", state); + } + + if((err = xenbus_unwatch_path_token(XBT_NIL, path, path))) { + TPMFRONT_ERR("Unable to unwatch %s, error was %s, ignoring..\n", path, err); + free(err); + } + return ret; +} + +static int tpmfront_connect(struct tpmfront_dev* dev) +{ + char* err; + /* Create shared page */ + dev->page = (tpmif_shared_page_t *)alloc_page(); + if(dev->page == NULL) { + TPMFRONT_ERR("Unable to allocate page for shared memory\n"); + goto error; + } + memset(dev->page, 0, PAGE_SIZE); + dev->ring_ref = gnttab_grant_access(dev->bedomid, virt_to_mfn(dev->page), 0); + TPMFRONT_DEBUG("grant ref is %lu\n", (unsigned long) dev->ring_ref); + + /*Create event channel */ + if(evtchn_alloc_unbound(dev->bedomid, tpmfront_handler, dev, &dev->evtchn)) { + TPMFRONT_ERR("Unable to allocate event channel\n"); + goto error_postmap; + } + unmask_evtchn(dev->evtchn); + TPMFRONT_DEBUG("event channel is %lu\n", (unsigned long) dev->evtchn); + + /* Write the entries to xenstore */ + if(publish_xenbus(dev)) { + goto error_postevtchn; + } + + /* Change state to connected */ + dev->state = XenbusStateConnected; + + /* Tell the backend that we are ready */ + if((err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%u", dev->state))) { + TPMFRONT_ERR("Unable to write to xenstore %s/state, value=%u", dev->nodename, XenbusStateConnected); + free(err); + goto error; + } + + return 0; +error_postevtchn: + mask_evtchn(dev->evtchn); + unbind_evtchn(dev->evtchn); +error_postmap: + gnttab_end_access(dev->ring_ref); + free_page(dev->page); +error: + return -1; +} + +struct tpmfront_dev* init_tpmfront(const char* _nodename) +{ + struct tpmfront_dev* dev; + const char* nodename; + char path[512]; + char* value, *err; + unsigned long long ival; + + printk("============= Init TPM Front ================\n"); + + dev = malloc(sizeof(struct tpmfront_dev)); + memset(dev, 0, sizeof(struct tpmfront_dev)); + +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + nodename = _nodename ? _nodename : "device/vtpm/0"; + dev->nodename = strdup(nodename); + + init_waitqueue_head(&dev->waitq); + + /* Get backend domid */ + snprintf(path, 512, "%s/backend-id", dev->nodename); + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMFRONT_ERR("Unable to read %s during tpmfront initialization! error = %s\n", path, err); + free(err); + goto error; + } + if(sscanf(value, "%llu", &ival) != 1) { + TPMFRONT_ERR("%s has non-integer value (%s)\n", path, value); + free(value); + goto error; + } + free(value); + dev->bedomid = ival; + + /* Get backend xenstore path */ + snprintf(path, 512, "%s/backend", dev->nodename); + if((err = xenbus_read(XBT_NIL, path, &dev->bepath))) { + TPMFRONT_ERR("Unable to read %s during tpmfront initialization! error = %s\n", path, err); + free(err); + goto error; + } + + /* Publish protocol v2 feature */ + snprintf(path, 512, "%s/feature-protocol-v2", dev->nodename); + if ((err = xenbus_write(XBT_NIL, path, "1"))) + { + TPMFRONT_ERR("Unable to write feature-protocol-v2 node: %s\n", err); + free(err); + goto error; + } + + /* Create and publish grant reference and event channel */ + if (tpmfront_connect(dev)) { + goto error; + } + + /* Wait for backend to connect */ + if( wait_for_backend_state_changed(dev, XenbusStateConnected)) { + goto error; + } + + /* Ensure backend is also using protocol v2 */ + snprintf(path, 512, "%s/feature-protocol-v2", dev->bepath); + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMFRONT_ERR("Unable to read %s during tpmfront initialization! error = %s\n", path, err); + free(err); + goto error; + } + if(strcmp(value, "1")) { + TPMFRONT_ERR("%s has an invalid value (%s)\n", path, value); + free(value); + goto error; + } + free(value); + + TPMFRONT_LOG("Initialization Completed successfully\n"); + + return dev; + +error: + shutdown_tpmfront(dev); + return NULL; +} +void shutdown_tpmfront(struct tpmfront_dev* dev) +{ + char* err; + char path[512]; + if(dev == NULL) { + return; + } + TPMFRONT_LOG("Shutting down tpmfront\n"); + /* disconnect */ + if(dev->state == XenbusStateConnected) { + /* Tell backend we are closing */ + dev->state = XenbusStateClosing; + if((err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%u", (unsigned int) dev->state))) { + TPMFRONT_ERR("Unable to write to %s, error was %s", dev->nodename, err); + free(err); + } + + /* Clean up xenstore entries */ + snprintf(path, 512, "%s/event-channel", dev->nodename); + if((err = xenbus_rm(XBT_NIL, path))) { + free(err); + } + snprintf(path, 512, "%s/ring-ref", dev->nodename); + if((err = xenbus_rm(XBT_NIL, path))) { + free(err); + } + + /* Tell backend we are closed */ + dev->state = XenbusStateClosed; + if((err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%u", (unsigned int) dev->state))) { + TPMFRONT_ERR("Unable to write to %s, error was %s", dev->nodename, err); + free(err); + } + + /* Wait for the backend to close and unmap shared pages, ignore any errors */ + wait_for_backend_state_changed(dev, XenbusStateClosed); + + /* Prepare for a later reopen (possibly by a kexec'd kernel) */ + dev->state = XenbusStateInitialising; + if((err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%u", (unsigned int) dev->state))) { + TPMFRONT_ERR("Unable to write to %s, error was %s", dev->nodename, err); + free(err); + } + + /* Close event channel and unmap shared page */ + mask_evtchn(dev->evtchn); + unbind_evtchn(dev->evtchn); + gnttab_end_access(dev->ring_ref); + + free_page(dev->page); + } + + /* Cleanup memory usage */ + if(dev->respbuf) { + free(dev->respbuf); + } + if(dev->bepath) { + free(dev->bepath); + } + if(dev->nodename) { + free(dev->nodename); + } + free(dev); +} + +int tpmfront_send(struct tpmfront_dev* dev, const uint8_t* msg, size_t length) +{ + unsigned int offset; + tpmif_shared_page_t *shr = NULL; +#ifdef TPMFRONT_PRINT_DEBUG + int i; +#endif + /* Error Checking */ + if(dev == NULL || dev->state != XenbusStateConnected) { + TPMFRONT_ERR("Tried to send message through disconnected frontend\n"); + return -1; + } + shr = dev->page; + +#ifdef TPMFRONT_PRINT_DEBUG + TPMFRONT_DEBUG("Sending Msg to backend size=%u", (unsigned int) length); + for(i = 0; i < length; ++i) { + if(!(i % 30)) { + TPMFRONT_DEBUG_MORE("\n"); + } + TPMFRONT_DEBUG_MORE("%02X ", msg[i]); + } + TPMFRONT_DEBUG_MORE("\n"); +#endif + + /* Copy to shared pages now */ + offset = sizeof(*shr); + if (length + offset > PAGE_SIZE) { + TPMFRONT_ERR("Message too long for shared page\n"); + return -1; + } + memcpy(offset + (uint8_t*)shr, msg, length); + shr->length = length; + barrier(); + shr->state = TPMIF_STATE_SUBMIT; + + dev->waiting = 1; + dev->resplen = 0; +#ifdef HAVE_LIBC + if(dev->fd >= 0) { + files[dev->fd].read = 0; + files[dev->fd].tpmfront.respgot = 0; + files[dev->fd].tpmfront.offset = 0; + } +#endif + wmb(); + notify_remote_via_evtchn(dev->evtchn); + return 0; +} +int tpmfront_recv(struct tpmfront_dev* dev, uint8_t** msg, size_t *length) +{ + unsigned int offset; + tpmif_shared_page_t *shr = NULL; +#ifdef TPMFRONT_PRINT_DEBUG +int i; +#endif + if(dev == NULL || dev->state != XenbusStateConnected) { + TPMFRONT_ERR("Tried to receive message from disconnected frontend\n"); + return -1; + } + /*Wait for the response */ + wait_event(dev->waitq, (!dev->waiting)); + shr = dev->page; + + /* Initialize */ + *msg = NULL; + *length = 0; + offset = sizeof(*shr); + + if (shr->state != TPMIF_STATE_FINISH) + goto quit; + + *length = shr->length; + + if (*length + offset > PAGE_SIZE) { + TPMFRONT_ERR("Reply too long for shared page\n"); + return -1; + } + + /* Alloc the buffer */ + if(dev->respbuf) { + free(dev->respbuf); + } + *msg = dev->respbuf = malloc(*length); + dev->resplen = *length; + + /* Copy the bits */ + memcpy(*msg, offset + (uint8_t*)shr, *length); + +#ifdef TPMFRONT_PRINT_DEBUG + TPMFRONT_DEBUG("Received response from backend size=%u", (unsigned int) *length); + for(i = 0; i < *length; ++i) { + if(!(i % 30)) { + TPMFRONT_DEBUG_MORE("\n"); + } + TPMFRONT_DEBUG_MORE("%02X ", (*msg)[i]); + } + TPMFRONT_DEBUG_MORE("\n"); +#endif +#ifdef HAVE_LIBC + if(dev->fd >= 0) { + files[dev->fd].tpmfront.respgot = 1; + } +#endif +quit: + return 0; +} + +int tpmfront_cmd(struct tpmfront_dev* dev, uint8_t* req, size_t reqlen, uint8_t** resp, size_t* resplen) +{ + int rc; + if((rc = tpmfront_send(dev, req, reqlen))) { + return rc; + } + if((rc = tpmfront_recv(dev, resp, resplen))) { + return rc; + } + + return 0; +} + +int tpmfront_set_locality(struct tpmfront_dev* dev, int locality) +{ + if (!dev || !dev->page) + return -1; + dev->page->locality = locality; + return 0; +} + +#ifdef HAVE_LIBC +#include +int tpmfront_open(struct tpmfront_dev* dev) +{ + /* Silently prevent multiple opens */ + if(dev->fd != -1) { + return dev->fd; + } + + dev->fd = alloc_fd(FTYPE_TPMFRONT); + printk("tpmfront_open(%s) -> %d\n", dev->nodename, dev->fd); + files[dev->fd].tpmfront.dev = dev; + files[dev->fd].tpmfront.offset = 0; + files[dev->fd].tpmfront.respgot = 0; + return dev->fd; +} + +int tpmfront_posix_write(int fd, const uint8_t* buf, size_t count) +{ + int rc; + struct tpmfront_dev* dev; + dev = files[fd].tpmfront.dev; + + if(count == 0) { + return 0; + } + + /* Return an error if we are already processing a command */ + if(dev->waiting) { + errno = EINPROGRESS; + return -1; + } + /* Send the command now */ + if((rc = tpmfront_send(dev, buf, count)) != 0) { + errno = EIO; + return -1; + } + return count; +} + +int tpmfront_posix_read(int fd, uint8_t* buf, size_t count) +{ + int rc; + uint8_t* dummybuf; + size_t dummysz; + struct tpmfront_dev* dev; + + dev = files[fd].tpmfront.dev; + + if(count == 0) { + return 0; + } + + /* get the response if we haven't already */ + if(files[dev->fd].tpmfront.respgot == 0) { + if ((rc = tpmfront_recv(dev, &dummybuf, &dummysz)) != 0) { + errno = EIO; + return -1; + } + } + + /* handle EOF case */ + if(files[dev->fd].tpmfront.offset >= dev->resplen) { + return 0; + } + + /* Compute the number of bytes and do the copy operation */ + if((rc = min(count, dev->resplen - files[dev->fd].tpmfront.offset)) != 0) { + memcpy(buf, dev->respbuf + files[dev->fd].tpmfront.offset, rc); + files[dev->fd].tpmfront.offset += rc; + } + + return rc; +} + +int tpmfront_posix_fstat(int fd, struct stat* buf) +{ + uint8_t* dummybuf; + size_t dummysz; + int rc; + struct tpmfront_dev* dev = files[fd].tpmfront.dev; + + /* If we have a response waiting, then read it now from the backend + * so we can get its length*/ + if(dev->waiting || (files[dev->fd].read == 1 && !files[dev->fd].tpmfront.respgot)) { + if ((rc = tpmfront_recv(dev, &dummybuf, &dummysz)) != 0) { + errno = EIO; + return -1; + } + } + + buf->st_mode = O_RDWR; + buf->st_uid = 0; + buf->st_gid = 0; + buf->st_size = dev->resplen; + buf->st_atime = buf->st_mtime = buf->st_ctime = time(NULL); + + return 0; +} + + +#endif diff -Nru xen-4.9.0/extras/mini-os/tpm_tis.c xen-4.9.2/extras/mini-os/tpm_tis.c --- xen-4.9.0/extras/mini-os/tpm_tis.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/tpm_tis.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,1523 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/char/tpm.c + * from the linux kernel + * + * Copyright (C) 2004 IBM Corporation + * + * This code has also been derived from drivers/char/tpm/tpm_tis.c + * from the linux kernel + * + * Copyright (C) 2005, 2006 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2 + * of the License + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef min + #define min( a, b ) ( ((a) < (b)) ? (a) : (b) ) +#endif +#define ADJUST_TIMEOUTS_TO_STANDARD(initial,standard,timeout_no) \ + if((initial) < (standard)){ \ + (initial) = (standard); \ + printk("Timeout %c was adjusted to standard value.\n",timeout_no); \ + } + +#define TPM_HEADER_SIZE 10 + +#define TPM_BUFSIZE 2048 + +struct tpm_input_header { + uint16_t tag; + uint32_t length; + uint32_t ordinal; +}__attribute__((packed)); + +struct tpm_output_header { + uint16_t tag; + uint32_t length; + uint32_t return_code; +}__attribute__((packed)); + +struct stclear_flags_t { + uint16_t tag; + uint8_t deactivated; + uint8_t disableForceClear; + uint8_t physicalPresence; + uint8_t physicalPresenceLock; + uint8_t bGlobalLock; +}__attribute__((packed)); + +struct tpm_version_t { + uint8_t Major; + uint8_t Minor; + uint8_t revMajor; + uint8_t revMinor; +}__attribute__((packed)); + +struct tpm_version_1_2_t { + uint16_t tag; + uint8_t Major; + uint8_t Minor; + uint8_t revMajor; + uint8_t revMinor; +}__attribute__((packed)); + +struct timeout_t { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; +}__attribute__((packed)); + +struct duration_t { + uint32_t tpm_short; + uint32_t tpm_medium; + uint32_t tpm_long; +}__attribute__((packed)); + +struct permanent_flags_t { + uint16_t tag; + uint8_t disable; + uint8_t ownership; + uint8_t deactivated; + uint8_t readPubek; + uint8_t disableOwnerClear; + uint8_t allowMaintenance; + uint8_t physicalPresenceLifetimeLock; + uint8_t physicalPresenceHWEnable; + uint8_t physicalPresenceCMDEnable; + uint8_t CEKPUsed; + uint8_t TPMpost; + uint8_t TPMpostLock; + uint8_t FIPS; + uint8_t operator; + uint8_t enableRevokeEK; + uint8_t nvLocked; + uint8_t readSRKPub; + uint8_t tpmEstablished; + uint8_t maintenanceDone; + uint8_t disableFullDALogicInfo; +}__attribute__((packed)); + +typedef union { + struct permanent_flags_t perm_flags; + struct stclear_flags_t stclear_flags; + bool owned; + uint32_t num_pcrs; + struct tpm_version_t tpm_version; + struct tpm_version_1_2_t tpm_version_1_2; + uint32_t manufacturer_id; + struct timeout_t timeout; + struct duration_t duration; +} cap_t; + +struct tpm_getcap_params_in { + uint32_t cap; + uint32_t subcap_size; + uint32_t subcap; +}__attribute__((packed)); + +struct tpm_getcap_params_out { + uint32_t cap_size; + cap_t cap; +}__attribute__((packed)); + +struct tpm_readpubek_params_out { + uint8_t algorithm[4]; + uint8_t encscheme[2]; + uint8_t sigscheme[2]; + uint32_t paramsize; + uint8_t parameters[12]; /*assuming RSA*/ + uint32_t keysize; + uint8_t modulus[256]; + uint8_t checksum[20]; +}__attribute__((packed)); + +typedef union { + struct tpm_input_header in; + struct tpm_output_header out; +} tpm_cmd_header; + +#define TPM_DIGEST_SIZE 20 +struct tpm_pcrread_out { + uint8_t pcr_result[TPM_DIGEST_SIZE]; +}__attribute__((packed)); + +struct tpm_pcrread_in { + uint32_t pcr_idx; +}__attribute__((packed)); + +struct tpm_pcrextend_in { + uint32_t pcr_idx; + uint8_t hash[TPM_DIGEST_SIZE]; +}__attribute__((packed)); + +typedef union { + struct tpm_getcap_params_out getcap_out; + struct tpm_readpubek_params_out readpubek_out; + uint8_t readpubek_out_buffer[sizeof(struct tpm_readpubek_params_out)]; + struct tpm_getcap_params_in getcap_in; + struct tpm_pcrread_in pcrread_in; + struct tpm_pcrread_out pcrread_out; + struct tpm_pcrextend_in pcrextend_in; +} tpm_cmd_params; + +struct tpm_cmd_t { + tpm_cmd_header header; + tpm_cmd_params params; +}__attribute__((packed)); + + +enum tpm_duration { + TPM_SHORT = 0, + TPM_MEDIUM = 1, + TPM_LONG = 2, + TPM_UNDEFINED, +}; + +#define TPM_MAX_ORDINAL 243 +#define TPM_MAX_PROTECTED_ORDINAL 12 +#define TPM_PROTECTED_ORDINAL_MASK 0xFF + +extern const uint8_t tpm_protected_ordinal_duration[TPM_MAX_PROTECTED_ORDINAL]; +extern const uint8_t tpm_ordinal_duration[TPM_MAX_ORDINAL]; + +#define TPM_DIGEST_SIZE 20 +#define TPM_ERROR_SIZE 10 +#define TPM_RET_CODE_IDX 6 + +/* tpm_capabilities */ +#define TPM_CAP_FLAG cpu_to_be32(4) +#define TPM_CAP_PROP cpu_to_be32(5) +#define CAP_VERSION_1_1 cpu_to_be32(0x06) +#define CAP_VERSION_1_2 cpu_to_be32(0x1A) + +/* tpm_sub_capabilities */ +#define TPM_CAP_PROP_PCR cpu_to_be32(0x101) +#define TPM_CAP_PROP_MANUFACTURER cpu_to_be32(0x103) +#define TPM_CAP_FLAG_PERM cpu_to_be32(0x108) +#define TPM_CAP_FLAG_VOL cpu_to_be32(0x109) +#define TPM_CAP_PROP_OWNER cpu_to_be32(0x111) +#define TPM_CAP_PROP_TIS_TIMEOUT cpu_to_be32(0x115) +#define TPM_CAP_PROP_TIS_DURATION cpu_to_be32(0x120) + + +#define TPM_INTERNAL_RESULT_SIZE 200 +#define TPM_TAG_RQU_COMMAND cpu_to_be16(193) +#define TPM_ORD_GET_CAP cpu_to_be32(101) + +extern const struct tpm_input_header tpm_getcap_header; + + + +const uint8_t tpm_protected_ordinal_duration[TPM_MAX_PROTECTED_ORDINAL] = { + TPM_UNDEFINED, /* 0 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 5 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 10 */ + TPM_SHORT, +}; + +const uint8_t tpm_ordinal_duration[TPM_MAX_ORDINAL] = { + TPM_UNDEFINED, /* 0 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 5 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 10 */ + TPM_SHORT, + TPM_MEDIUM, + TPM_LONG, + TPM_LONG, + TPM_MEDIUM, /* 15 */ + TPM_SHORT, + TPM_SHORT, + TPM_MEDIUM, + TPM_LONG, + TPM_SHORT, /* 20 */ + TPM_SHORT, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_SHORT, /* 25 */ + TPM_SHORT, + TPM_MEDIUM, + TPM_SHORT, + TPM_SHORT, + TPM_MEDIUM, /* 30 */ + TPM_LONG, + TPM_MEDIUM, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, /* 35 */ + TPM_MEDIUM, + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_MEDIUM, /* 40 */ + TPM_LONG, + TPM_MEDIUM, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, /* 45 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_LONG, + TPM_MEDIUM, /* 50 */ + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 55 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_MEDIUM, /* 60 */ + TPM_MEDIUM, + TPM_MEDIUM, + TPM_SHORT, + TPM_SHORT, + TPM_MEDIUM, /* 65 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 70 */ + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 75 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_LONG, /* 80 */ + TPM_UNDEFINED, + TPM_MEDIUM, + TPM_LONG, + TPM_SHORT, + TPM_UNDEFINED, /* 85 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 90 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, /* 95 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_MEDIUM, /* 100 */ + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 105 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 110 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, /* 115 */ + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_LONG, /* 120 */ + TPM_LONG, + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_SHORT, + TPM_SHORT, /* 125 */ + TPM_SHORT, + TPM_LONG, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, /* 130 */ + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_SHORT, + TPM_MEDIUM, + TPM_UNDEFINED, /* 135 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 140 */ + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 145 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 150 */ + TPM_MEDIUM, + TPM_MEDIUM, + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, /* 155 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 160 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 165 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_LONG, /* 170 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 175 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_MEDIUM, /* 180 */ + TPM_SHORT, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_MEDIUM, /* 185 */ + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 190 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 195 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 200 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, + TPM_SHORT, /* 205 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_MEDIUM, /* 210 */ + TPM_UNDEFINED, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_UNDEFINED, /* 215 */ + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, + TPM_SHORT, /* 220 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, /* 225 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 230 */ + TPM_LONG, + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 235 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 240 */ + TPM_UNDEFINED, + TPM_MEDIUM, +}; + +const struct tpm_input_header tpm_getcap_header = { + .tag = TPM_TAG_RQU_COMMAND, + .length = cpu_to_be32(22), + .ordinal = TPM_ORD_GET_CAP +}; + + +enum tis_access { + TPM_ACCESS_VALID = 0x80, + TPM_ACCESS_ACTIVE_LOCALITY = 0x20, /* (R) */ + TPM_ACCESS_RELINQUISH_LOCALITY = 0x20,/* (W) */ + TPM_ACCESS_REQUEST_PENDING = 0x04, /* (W) */ + TPM_ACCESS_REQUEST_USE = 0x02, /* (W) */ +}; + +enum tis_status { + TPM_STS_VALID = 0x80, /* (R) */ + TPM_STS_COMMAND_READY = 0x40, /* (R) */ + TPM_STS_DATA_AVAIL = 0x10, /* (R) */ + TPM_STS_DATA_EXPECT = 0x08, /* (R) */ + TPM_STS_GO = 0x20, /* (W) */ +}; + +enum tis_int_flags { + TPM_GLOBAL_INT_ENABLE = 0x80000000, + TPM_INTF_BURST_COUNT_STATIC = 0x100, + TPM_INTF_CMD_READY_INT = 0x080, + TPM_INTF_INT_EDGE_FALLING = 0x040, + TPM_INTF_INT_EDGE_RISING = 0x020, + TPM_INTF_INT_LEVEL_LOW = 0x010, + TPM_INTF_INT_LEVEL_HIGH = 0x008, + TPM_INTF_LOCALITY_CHANGE_INT = 0x004, + TPM_INTF_STS_VALID_INT = 0x002, + TPM_INTF_DATA_AVAIL_INT = 0x001, +}; + +enum tis_defaults { + TIS_MEM_BASE = 0xFED40000, + TIS_MEM_LEN = 0x5000, + TIS_SHORT_TIMEOUT = 750, /*ms*/ + TIS_LONG_TIMEOUT = 2000, /*2 sec */ +}; + +#define TPM_TIMEOUT 5 + +#define TPM_ACCESS(t, l) (((uint8_t*)t->pages[l]) + 0x0000) +#define TPM_INT_ENABLE(t, l) ((uint32_t*)(((uint8_t*)t->pages[l]) + 0x0008)) +#define TPM_INT_VECTOR(t, l) (((uint8_t*)t->pages[l]) + 0x000C) +#define TPM_INT_STATUS(t, l) (((uint8_t*)t->pages[l]) + 0x0010) +#define TPM_INTF_CAPS(t, l) ((uint32_t*)(((uint8_t*)t->pages[l]) + 0x0014)) +#define TPM_STS(t, l) ((uint8_t*)(((uint8_t*)t->pages[l]) + 0x0018)) +#define TPM_DATA_FIFO(t, l) (((uint8_t*)t->pages[l]) + 0x0024) + +#define TPM_DID_VID(t, l) ((uint32_t*)(((uint8_t*)t->pages[l]) + 0x0F00)) +#define TPM_RID(t, l) (((uint8_t*)t->pages[l]) + 0x0F04) + +struct tpm_chip { + int enabled_localities; + int locality; + unsigned long baseaddr; + uint8_t* pages[5]; + int did, vid, rid; + + uint8_t data_buffer[TPM_BUFSIZE]; + int data_len; + + s_time_t timeout_a, timeout_b, timeout_c, timeout_d; + s_time_t duration[3]; + +#ifdef HAVE_LIBC + int fd; +#endif + + unsigned int irq; + struct wait_queue_head read_queue; + struct wait_queue_head int_queue; +}; + + +static void __init_tpm_chip(struct tpm_chip* tpm) { + tpm->enabled_localities = TPM_TIS_EN_LOCLALL; + tpm->locality = -1; + tpm->baseaddr = 0; + tpm->pages[0] = tpm->pages[1] = tpm->pages[2] = tpm->pages[3] = tpm->pages[4] = NULL; + tpm->vid = 0; + tpm->did = 0; + tpm->irq = 0; + init_waitqueue_head(&tpm->read_queue); + init_waitqueue_head(&tpm->int_queue); + + tpm->data_len = -1; + +#ifdef HAVE_LIBC + tpm->fd = -1; +#endif +} + +/* + * Returns max number of nsecs to wait + */ +s_time_t tpm_calc_ordinal_duration(struct tpm_chip *chip, + uint32_t ordinal) +{ + int duration_idx = TPM_UNDEFINED; + s_time_t duration = 0; + + if (ordinal < TPM_MAX_ORDINAL) + duration_idx = tpm_ordinal_duration[ordinal]; + else if ((ordinal & TPM_PROTECTED_ORDINAL_MASK) < + TPM_MAX_PROTECTED_ORDINAL) + duration_idx = + tpm_protected_ordinal_duration[ordinal & + TPM_PROTECTED_ORDINAL_MASK]; + + if (duration_idx != TPM_UNDEFINED) { + duration = chip->duration[duration_idx]; + } + + if (duration <= 0) { + return SECONDS(120); + } + else + { + return duration; + } +} + + +static int locality_enabled(struct tpm_chip* tpm, int l) { + return l >= 0 && tpm->enabled_localities & (1 << l); +} + +static int check_locality(struct tpm_chip* tpm, int l) { + if(locality_enabled(tpm, l) && (ioread8(TPM_ACCESS(tpm, l)) & + (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID)) == + (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID)) { + return l; + } + return -1; +} + +void release_locality(struct tpm_chip* tpm, int l, int force) +{ + if (locality_enabled(tpm, l) && (force || (ioread8(TPM_ACCESS(tpm, l)) & + (TPM_ACCESS_REQUEST_PENDING | TPM_ACCESS_VALID)) == + (TPM_ACCESS_REQUEST_PENDING | TPM_ACCESS_VALID))) { + iowrite8(TPM_ACCESS(tpm, l), TPM_ACCESS_RELINQUISH_LOCALITY); + } +} + +int tpm_tis_request_locality(struct tpm_chip* tpm, int l) { + + s_time_t stop; + /*Make sure locality is valid */ + if(!locality_enabled(tpm, l)) { + printk("tpm_tis_change_locality() Tried to change to locality %d, but it is disabled or invalid!\n", l); + return -1; + } + /* Check if we already have the current locality */ + if(check_locality(tpm, l) >= 0) { + return tpm->locality = l; + } + /* Set the new locality*/ + iowrite8(TPM_ACCESS(tpm, l), TPM_ACCESS_REQUEST_USE); + + if(tpm->irq) { + /* Wait for interrupt */ + wait_event_deadline(tpm->int_queue, (check_locality(tpm, l) >= 0), NOW() + tpm->timeout_a); + + /* FIXME: Handle timeout event, should return error in that case */ + return l; + } else { + /* Wait for burstcount */ + stop = NOW() + tpm->timeout_a; + do { + if(check_locality(tpm, l) >= 0) { + return tpm->locality = l; + } + msleep(TPM_TIMEOUT); + } while(NOW() < stop); + } + + printk("REQ LOCALITY FAILURE\n"); + return -1; +} + +static uint8_t tpm_tis_status(struct tpm_chip* tpm) { + return ioread8(TPM_STS(tpm, tpm->locality)); +} + +/* This causes the current command to be aborted */ +static void tpm_tis_ready(struct tpm_chip* tpm) { + iowrite8(TPM_STS(tpm, tpm->locality), TPM_STS_COMMAND_READY); +} +#define tpm_tis_cancel_cmd(v) tpm_tis_ready(v) + +static int get_burstcount(struct tpm_chip* tpm) { + s_time_t stop; + int burstcnt; + + stop = NOW() + tpm->timeout_d; + do { + burstcnt = ioread8((TPM_STS(tpm, tpm->locality) + 1)); + burstcnt += ioread8(TPM_STS(tpm, tpm->locality) + 2) << 8; + + if (burstcnt) { + return burstcnt; + } + msleep(TPM_TIMEOUT); + } while(NOW() < stop); + return -EBUSY; +} + +static int wait_for_stat(struct tpm_chip* tpm, uint8_t mask, + unsigned long timeout, struct wait_queue_head* queue) { + s_time_t stop; + uint8_t status; + + status = tpm_tis_status(tpm); + if((status & mask) == mask) { + return 0; + } + + if(tpm->irq) { + wait_event_deadline(*queue, ((tpm_tis_status(tpm) & mask) == mask), timeout); + /* FIXME: Check for timeout and return -ETIME */ + return 0; + } else { + stop = NOW() + timeout; + do { + msleep(TPM_TIMEOUT); + status = tpm_tis_status(tpm); + if((status & mask) == mask) + return 0; + } while( NOW() < stop); + } + return -ETIME; +} + +static int recv_data(struct tpm_chip* tpm, uint8_t* buf, size_t count) { + int size = 0; + int burstcnt; + while( size < count && + wait_for_stat(tpm, + TPM_STS_DATA_AVAIL | TPM_STS_VALID, + tpm->timeout_c, + &tpm->read_queue) + == 0) { + burstcnt = get_burstcount(tpm); + for(; burstcnt > 0 && size < count; --burstcnt) + { + buf[size++] = ioread8(TPM_DATA_FIFO(tpm, tpm->locality)); + } + } + return size; +} + +int tpm_tis_recv(struct tpm_chip* tpm, uint8_t* buf, size_t count) { + int size = 0; + int expected, status; + + if (count < TPM_HEADER_SIZE) { + size = -EIO; + goto out; + } + + /* read first 10 bytes, including tag, paramsize, and result */ + if((size = + recv_data(tpm, buf, TPM_HEADER_SIZE)) < TPM_HEADER_SIZE) { + printk("Error reading tpm cmd header\n"); + goto out; + } + + expected = be32_to_cpu(*((uint32_t*)(buf + 2))); + if(expected > count) { + size = -EIO; + goto out; + } + + if((size += recv_data(tpm, & buf[TPM_HEADER_SIZE], + expected - TPM_HEADER_SIZE)) < expected) { + printk("Unable to read rest of tpm command size=%d expected=%d\n", size, expected); + size = -ETIME; + goto out; + } + + wait_for_stat(tpm, TPM_STS_VALID, tpm->timeout_c, &tpm->int_queue); + status = tpm_tis_status(tpm); + if(status & TPM_STS_DATA_AVAIL) { + printk("Error: left over data\n"); + size = -EIO; + goto out; + } + +out: + tpm_tis_ready(tpm); + release_locality(tpm, tpm->locality, 0); + return size; +} +int tpm_tis_send(struct tpm_chip* tpm, uint8_t* buf, size_t len) { + int rc; + int status, burstcnt = 0; + int count = 0; + uint32_t ordinal; + + if(tpm_tis_request_locality(tpm, tpm->locality) < 0) { + return -EBUSY; + } + + status = tpm_tis_status(tpm); + if((status & TPM_STS_COMMAND_READY) == 0) { + tpm_tis_ready(tpm); + if(wait_for_stat(tpm, TPM_STS_COMMAND_READY, tpm->timeout_b, &tpm->int_queue) < 0) { + rc = -ETIME; + goto out_err; + } + } + + while(count < len - 1) { + burstcnt = get_burstcount(tpm); + for(;burstcnt > 0 && count < len -1; --burstcnt) { + iowrite8(TPM_DATA_FIFO(tpm, tpm->locality), buf[count++]); + } + + wait_for_stat(tpm, TPM_STS_VALID, tpm->timeout_c, &tpm->int_queue); + status = tpm_tis_status(tpm); + if((status & TPM_STS_DATA_EXPECT) == 0) { + rc = -EIO; + goto out_err; + } + } + + /*Write last byte*/ + iowrite8(TPM_DATA_FIFO(tpm, tpm->locality), buf[count]); + wait_for_stat(tpm, TPM_STS_VALID, tpm->timeout_c, &tpm->read_queue); + status = tpm_tis_status(tpm); + if((status & TPM_STS_DATA_EXPECT) != 0) { + rc = -EIO; + goto out_err; + } + + /*go and do it*/ + iowrite8(TPM_STS(tpm, tpm->locality), TPM_STS_GO); + + if(tpm->irq) { + /*Wait for interrupt */ + ordinal = be32_to_cpu(*(buf + 6)); + if(wait_for_stat(tpm, + TPM_STS_DATA_AVAIL | TPM_STS_VALID, + tpm_calc_ordinal_duration(tpm, ordinal), + &tpm->read_queue) < 0) { + rc = -ETIME; + goto out_err; + } + } +#ifdef HAVE_LIBC + if(tpm->fd >= 0) { + files[tpm->fd].read = 0; + files[tpm->fd].tpm_tis.respgot = 0; + files[tpm->fd].tpm_tis.offset = 0; + } +#endif + return len; + +out_err: + tpm_tis_ready(tpm); + release_locality(tpm, tpm->locality, 0); + return rc; +} + +static void tpm_tis_irq_handler(evtchn_port_t port, struct pt_regs *regs, void* data) +{ + struct tpm_chip* tpm = data; + uint32_t interrupt; + int i; + + interrupt = ioread32(TPM_INT_STATUS(tpm, tpm->locality)); + if(interrupt == 0) { + return; + } + + if(interrupt & TPM_INTF_DATA_AVAIL_INT) { + wake_up(&tpm->read_queue); + } + if(interrupt & TPM_INTF_LOCALITY_CHANGE_INT) { + for(i = 0; i < 5; ++i) { + if(check_locality(tpm, i) >= 0) { + break; + } + } + } + if(interrupt & (TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_STS_VALID_INT | + TPM_INTF_CMD_READY_INT)) { + wake_up(&tpm->int_queue); + } + + /* Clear interrupts handled with TPM_EOI */ + iowrite32(TPM_INT_STATUS(tpm, tpm->locality), interrupt); + ioread32(TPM_INT_STATUS(tpm, tpm->locality)); + return; +} + +/* + * Internal kernel interface to transmit TPM commands + */ +static ssize_t tpm_transmit(struct tpm_chip *chip, const uint8_t *buf, + size_t bufsiz) +{ + ssize_t rc; + uint32_t count, ordinal; + s_time_t stop; + + count = be32_to_cpu(*((uint32_t *) (buf + 2))); + ordinal = be32_to_cpu(*((uint32_t *) (buf + 6))); + if (count == 0) + return -ENODATA; + if (count > bufsiz) { + printk("Error: invalid count value %x %zx \n", count, bufsiz); + return -E2BIG; + } + + //down(&chip->tpm_mutex); + + if ((rc = tpm_tis_send(chip, (uint8_t *) buf, count)) < 0) { + printk("tpm_transmit: tpm_send: error %ld\n", (long) rc); + goto out; + } + + if (chip->irq) + goto out_recv; + + stop = NOW() + tpm_calc_ordinal_duration(chip, ordinal); + do { + uint8_t status = tpm_tis_status(chip); + if ((status & (TPM_STS_DATA_AVAIL | TPM_STS_VALID)) == + (TPM_STS_DATA_AVAIL | TPM_STS_VALID)) + goto out_recv; + + if ((status == TPM_STS_COMMAND_READY)) { + printk("TPM Error: Operation Canceled\n"); + rc = -ECANCELED; + goto out; + } + + msleep(TPM_TIMEOUT); /* CHECK */ + rmb(); + } while (NOW() < stop); + + /* Cancel the command */ + tpm_tis_cancel_cmd(chip); + printk("TPM Operation Timed out\n"); + rc = -ETIME; + goto out; + +out_recv: + if((rc = tpm_tis_recv(chip, (uint8_t *) buf, bufsiz)) < 0) { + printk("tpm_transmit: tpm_recv: error %d\n", (int) rc); + } +out: + //up(&chip->tpm_mutex); + return rc; +} + +static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd, + int len, const char *desc) +{ + int err; + + len = tpm_transmit(chip,(uint8_t *) cmd, len); + if (len < 0) + return len; + if (len == TPM_ERROR_SIZE) { + err = be32_to_cpu(cmd->header.out.return_code); + printk("A TPM error (%d) occurred %s\n", err, desc); + return err; + } + return 0; +} + +int tpm_get_timeouts(struct tpm_chip *chip) +{ + struct tpm_cmd_t tpm_cmd; + struct timeout_t *timeout_cap; + struct duration_t *duration_cap; + ssize_t rc; + uint32_t timeout; + unsigned int scale = 1; + + tpm_cmd.header.in = tpm_getcap_header; + tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; + tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4); + tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT; + + if((rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, + "attempting to determine the timeouts")) != 0) { + printk("transmit failed %d\n", (int) rc); + goto duration; + } + + if(be32_to_cpu(tpm_cmd.header.out.return_code) != 0 || + be32_to_cpu(tpm_cmd.header.out.length) != + sizeof(tpm_cmd.header.out) + sizeof(uint32_t) + 4 * sizeof(uint32_t)) { + return -EINVAL; + } + + timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout; + /* Don't overwrite default if value is 0 */ + timeout = be32_to_cpu(timeout_cap->a); + if(timeout && timeout < 1000) { + /* timeouts in msc rather usec */ + scale = 1000; + } + if (timeout) + chip->timeout_a = MICROSECS(timeout * scale); /*Convert to msec */ + ADJUST_TIMEOUTS_TO_STANDARD(chip->timeout_a,MILLISECS(TIS_SHORT_TIMEOUT),'a'); + + timeout = be32_to_cpu(timeout_cap->b); + if (timeout) + chip->timeout_b = MICROSECS(timeout * scale); /*Convert to msec */ + ADJUST_TIMEOUTS_TO_STANDARD(chip->timeout_b,MILLISECS(TIS_LONG_TIMEOUT),'b'); + + timeout = be32_to_cpu(timeout_cap->c); + if (timeout) + chip->timeout_c = MICROSECS(timeout * scale); /*Convert to msec */ + ADJUST_TIMEOUTS_TO_STANDARD(chip->timeout_c,MILLISECS(TIS_SHORT_TIMEOUT),'c'); + + timeout = be32_to_cpu(timeout_cap->d); + if (timeout) + chip->timeout_d = MICROSECS(timeout * scale); /*Convert to msec */ + ADJUST_TIMEOUTS_TO_STANDARD(chip->timeout_d,MILLISECS(TIS_SHORT_TIMEOUT),'d'); + +duration: + tpm_cmd.header.in = tpm_getcap_header; + tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; + tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4); + tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_DURATION; + + if((rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, + "attempting to determine the durations")) < 0) { + return rc; + } + + if(be32_to_cpu(tpm_cmd.header.out.return_code) != 0 || + be32_to_cpu(tpm_cmd.header.out.length) != + sizeof(tpm_cmd.header.out) + sizeof(uint32_t) + 3 * sizeof(uint32_t)) { + return -EINVAL; + } + + duration_cap = &tpm_cmd.params.getcap_out.cap.duration; + chip->duration[TPM_SHORT] = MICROSECS(be32_to_cpu(duration_cap->tpm_short)); + chip->duration[TPM_MEDIUM] = MICROSECS(be32_to_cpu(duration_cap->tpm_medium)); + chip->duration[TPM_LONG] = MICROSECS(be32_to_cpu(duration_cap->tpm_long)); + + /* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above + * value wrong and apparently reports msecs rather than usecs. So we + * fix up the resulting too-small TPM_SHORT value to make things work. + */ + if (chip->duration[TPM_SHORT] < MILLISECS(10)) { + chip->duration[TPM_SHORT] = SECONDS(1); + chip->duration[TPM_MEDIUM] *= 1000; + chip->duration[TPM_LONG] *= 1000; + printk("Adjusting TPM timeout parameters\n"); + } + + return 0; +} + + + +void tpm_continue_selftest(struct tpm_chip* chip) { + uint8_t data[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 10, /* length */ + 0, 0, 0, 83, /* TPM_ORD_GetCapability */ + }; + + tpm_transmit(chip, data, sizeof(data)); +} + +ssize_t tpm_getcap(struct tpm_chip *chip, uint32_t subcap_id, cap_t *cap, + const char *desc) +{ + struct tpm_cmd_t tpm_cmd; + int rc; + + tpm_cmd.header.in = tpm_getcap_header; + if (subcap_id == CAP_VERSION_1_1 || subcap_id == CAP_VERSION_1_2) { + tpm_cmd.params.getcap_in.cap = subcap_id; + /*subcap field not necessary */ + tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(0); + tpm_cmd.header.in.length -= cpu_to_be32(sizeof(uint32_t)); + } else { + if (subcap_id == TPM_CAP_FLAG_PERM || + subcap_id == TPM_CAP_FLAG_VOL) + tpm_cmd.params.getcap_in.cap = TPM_CAP_FLAG; + else + tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; + tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4); + tpm_cmd.params.getcap_in.subcap = subcap_id; + } + rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc); + if (!rc) + *cap = tpm_cmd.params.getcap_out.cap; + return rc; +} + + +struct tpm_chip* init_tpm_tis(unsigned long baseaddr, int localities, unsigned int irq) +{ + int i; + unsigned long addr; + struct tpm_chip* tpm = NULL; + uint32_t didvid; + uint32_t intfcaps; + uint32_t intmask; + + printk("============= Init TPM TIS Driver ==============\n"); + + /*Sanity check the localities input */ + if(localities & ~TPM_TIS_EN_LOCLALL) { + printk("init_tpm_tis() Invalid locality specification! %X\n", localities); + goto abort_egress; + } + + printk("IOMEM Machine Base Address: %lX\n", baseaddr); + + /* Create the tpm data structure */ + tpm = malloc(sizeof(struct tpm_chip)); + __init_tpm_chip(tpm); + + /* Set the enabled localities - if 0 we leave default as all enabled */ + if(localities != 0) { + tpm->enabled_localities = localities; + } + printk("Enabled Localities: "); + for(i = 0; i < 5; ++i) { + if(locality_enabled(tpm, i)) { + printk("%d ", i); + } + } + printk("\n"); + + /* Set the base machine address */ + tpm->baseaddr = baseaddr; + + /* Set default timeouts */ + tpm->timeout_a = MILLISECS(TIS_SHORT_TIMEOUT); + tpm->timeout_b = MILLISECS(TIS_LONG_TIMEOUT); + tpm->timeout_c = MILLISECS(TIS_SHORT_TIMEOUT); + tpm->timeout_d = MILLISECS(TIS_SHORT_TIMEOUT); + + /*Map the mmio pages */ + addr = tpm->baseaddr; + for(i = 0; i < 5; ++i) { + if(locality_enabled(tpm, i)) { + /* Map the page in now */ + if((tpm->pages[i] = ioremap_nocache(addr, PAGE_SIZE)) == NULL) { + printk("Unable to map iomem page a address %lx\n", addr); + goto abort_egress; + } + + /* Set default locality to the first enabled one */ + if (tpm->locality < 0) { + if(tpm_tis_request_locality(tpm, i) < 0) { + printk("Unable to request locality %d??\n", i); + goto abort_egress; + } + } + } + addr += PAGE_SIZE; + } + + + /* Get the vendor and device ids */ + didvid = ioread32(TPM_DID_VID(tpm, tpm->locality)); + tpm->did = didvid >> 16; + tpm->vid = didvid & 0xFFFF; + + + /* Get the revision id */ + tpm->rid = ioread8(TPM_RID(tpm, tpm->locality)); + + printk("1.2 TPM (device-id=0x%X vendor-id = %X rev-id = %X)\n", tpm->did, tpm->vid, tpm->rid); + + intfcaps = ioread32(TPM_INTF_CAPS(tpm, tpm->locality)); + printk("TPM interface capabilities (0x%x):\n", intfcaps); + if (intfcaps & TPM_INTF_BURST_COUNT_STATIC) + printk("\tBurst Count Static\n"); + if (intfcaps & TPM_INTF_CMD_READY_INT) + printk("\tCommand Ready Int Support\n"); + if (intfcaps & TPM_INTF_INT_EDGE_FALLING) + printk("\tInterrupt Edge Falling\n"); + if (intfcaps & TPM_INTF_INT_EDGE_RISING) + printk("\tInterrupt Edge Rising\n"); + if (intfcaps & TPM_INTF_INT_LEVEL_LOW) + printk("\tInterrupt Level Low\n"); + if (intfcaps & TPM_INTF_INT_LEVEL_HIGH) + printk("\tInterrupt Level High\n"); + if (intfcaps & TPM_INTF_LOCALITY_CHANGE_INT) + printk("\tLocality Change Int Support\n"); + if (intfcaps & TPM_INTF_STS_VALID_INT) + printk("\tSts Valid Int Support\n"); + if (intfcaps & TPM_INTF_DATA_AVAIL_INT) + printk("\tData Avail Int Support\n"); + + /*Interupt setup */ + intmask = ioread32(TPM_INT_ENABLE(tpm, tpm->locality)); + + intmask |= TPM_INTF_CMD_READY_INT + | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_DATA_AVAIL_INT + | TPM_INTF_STS_VALID_INT; + + iowrite32(TPM_INT_ENABLE(tpm, tpm->locality), intmask); + + /*If interupts are enabled, handle it */ + if(irq) { + if(irq != TPM_PROBE_IRQ) { + tpm->irq = irq; + } else { + /*FIXME add irq probing feature later */ + printk("IRQ probing not implemented\n"); + } + } + + if(tpm->irq) { + iowrite8(TPM_INT_VECTOR(tpm, tpm->locality), tpm->irq); + + if(bind_pirq(tpm->irq, 1, tpm_tis_irq_handler, tpm) != 0) { + printk("Unabled to request irq: %u for use\n", tpm->irq); + printk("Will use polling mode\n"); + tpm->irq = 0; + } else { + /* Clear all existing */ + iowrite32(TPM_INT_STATUS(tpm, tpm->locality), ioread32(TPM_INT_STATUS(tpm, tpm->locality))); + + /* Turn on interrupts */ + iowrite32(TPM_INT_ENABLE(tpm, tpm->locality), intmask | TPM_GLOBAL_INT_ENABLE); + } + } + + if(tpm_get_timeouts(tpm)) { + printk("Could not get TPM timeouts and durations\n"); + goto abort_egress; + } + tpm_continue_selftest(tpm); + + + return tpm; +abort_egress: + if(tpm != NULL) { + shutdown_tpm_tis(tpm); + } + return NULL; +} + +void shutdown_tpm_tis(struct tpm_chip* tpm){ + int i; + + printk("Shutting down tpm_tis device\n"); + + iowrite32(TPM_INT_ENABLE(tpm, tpm->locality), ~TPM_GLOBAL_INT_ENABLE); + + /*Unmap all of the mmio pages */ + for(i = 0; i < 5; ++i) { + if(tpm->pages[i] != NULL) { + iounmap(tpm->pages[i], PAGE_SIZE); + tpm->pages[i] = NULL; + } + } + free(tpm); + return; +} + + +int tpm_tis_cmd(struct tpm_chip* tpm, uint8_t* req, size_t reqlen, uint8_t** resp, size_t* resplen) +{ + if(tpm->locality < 0) { + printk("tpm_tis_cmd() failed! locality not set!\n"); + return -1; + } + if(reqlen > TPM_BUFSIZE) { + reqlen = TPM_BUFSIZE; + } + memcpy(tpm->data_buffer, req, reqlen); + *resplen = tpm_transmit(tpm, tpm->data_buffer, TPM_BUFSIZE); + + *resp = malloc(*resplen); + memcpy(*resp, tpm->data_buffer, *resplen); + return 0; +} + +#ifdef HAVE_LIBC +int tpm_tis_open(struct tpm_chip* tpm) +{ + /* Silently prevent multiple opens */ + if(tpm->fd != -1) { + return tpm->fd; + } + + tpm->fd = alloc_fd(FTYPE_TPM_TIS); + printk("tpm_tis_open() -> %d\n", tpm->fd); + files[tpm->fd].tpm_tis.dev = tpm; + files[tpm->fd].tpm_tis.offset = 0; + files[tpm->fd].tpm_tis.respgot = 0; + return tpm->fd; +} + +int tpm_tis_posix_write(int fd, const uint8_t* buf, size_t count) +{ + struct tpm_chip* tpm; + tpm = files[fd].tpm_tis.dev; + + if(tpm->locality < 0) { + printk("tpm_tis_posix_write() failed! locality not set!\n"); + errno = EINPROGRESS; + return -1; + } + if(count == 0) { + return 0; + } + + /* Return an error if we are already processing a command */ + if(count > TPM_BUFSIZE) { + count = TPM_BUFSIZE; + } + /* Send the command now */ + memcpy(tpm->data_buffer, buf, count); + if((tpm->data_len = tpm_transmit(tpm, tpm->data_buffer, TPM_BUFSIZE)) < 0) { + errno = EIO; + return -1; + } + return count; +} + +int tpm_tis_posix_read(int fd, uint8_t* buf, size_t count) +{ + int rc; + struct tpm_chip* tpm; + tpm = files[fd].tpm_tis.dev; + + if(count == 0) { + return 0; + } + + /* If there is no tpm resp to read, return EIO */ + if(tpm->data_len < 0) { + errno = EIO; + return -1; + } + + + /* Handle EOF case */ + if(files[fd].tpm_tis.offset >= tpm->data_len) { + rc = 0; + } else { + rc = min(tpm->data_len - files[fd].tpm_tis.offset, count); + memcpy(buf, tpm->data_buffer + files[fd].tpm_tis.offset, rc); + } + files[fd].tpm_tis.offset += rc; + /* Reset the data pending flag */ + return rc; +} +int tpm_tis_posix_fstat(int fd, struct stat* buf) +{ + struct tpm_chip* tpm; + tpm = files[fd].tpm_tis.dev; + + buf->st_mode = O_RDWR; + buf->st_uid = 0; + buf->st_gid = 0; + buf->st_size = be32_to_cpu(*((uint32_t*)(tpm->data_buffer + 2))); + buf->st_atime = buf->st_mtime = buf->st_ctime = time(NULL); + return 0; +} + +/* TPM 2.0 */ + +/*TPM2.0 Selftest*/ +static void tpm2_selftest(struct tpm_chip* chip) +{ + uint8_t data[] = { + 0x80, 0x1, + 0x0, 0x0, 0x0, 0xb, + 0x0, 0x0, 0x1, 0x43, + 0x1, + }; + + tpm_transmit(chip, data, sizeof(data)); +} + +struct tpm_chip* init_tpm2_tis(unsigned long baseaddr, int localities, unsigned int irq) +{ + int i; + unsigned long addr; + struct tpm_chip* tpm = NULL; + uint32_t didvid; + uint32_t intfcaps; + uint32_t intmask; + + printk("============= Init TPM2 TIS Driver ==============\n"); + + /*Sanity check the localities input */ + if (localities & ~TPM_TIS_EN_LOCLALL) { + printk("init_tpm2_tis Invalid locality specification! %X\n", localities); + goto abort_egress; + } + + printk("IOMEM Machine Base Address: %lX\n", baseaddr); + + /* Create the tpm data structure */ + tpm = malloc(sizeof(struct tpm_chip)); + __init_tpm_chip(tpm); + + /* Set the enabled localities - if 0 we leave default as all enabled */ + if (localities != 0) { + tpm->enabled_localities = localities; + } + printk("Enabled Localities: "); + for (i = 0; i < 5; ++i) { + if (locality_enabled(tpm, i)) { + printk("%d ", i); + } + } + printk("\n"); + + /* Set the base machine address */ + tpm->baseaddr = baseaddr; + + /* Set default timeouts */ + tpm->timeout_a = MILLISECS(TIS_SHORT_TIMEOUT); + tpm->timeout_b = MILLISECS(TIS_LONG_TIMEOUT); + tpm->timeout_c = MILLISECS(TIS_SHORT_TIMEOUT); + tpm->timeout_d = MILLISECS(TIS_SHORT_TIMEOUT); + + /*Map the mmio pages */ + addr = tpm->baseaddr; + for (i = 0; i < 5; ++i) { + if (locality_enabled(tpm, i)) { + + /* Map the page in now */ + if ((tpm->pages[i] = ioremap_nocache(addr, PAGE_SIZE)) == NULL) { + printk("Unable to map iomem page a address %lx\n", addr); + goto abort_egress; + } + + /* Set default locality to the first enabled one */ + if (tpm->locality < 0) { + if (tpm_tis_request_locality(tpm, i) < 0) { + printk("Unable to request locality %d??\n", i); + goto abort_egress; + } + } + } + addr += PAGE_SIZE; + } + + /* Get the vendor and device ids */ + didvid = ioread32(TPM_DID_VID(tpm, tpm->locality)); + tpm->did = didvid >> 16; + tpm->vid = didvid & 0xFFFF; + + /* Get the revision id */ + tpm->rid = ioread8(TPM_RID(tpm, tpm->locality)); + printk("2.0 TPM (device-id=0x%X vendor-id = %X rev-id = %X)\n", + tpm->did, tpm->vid, tpm->rid); + + intfcaps = ioread32(TPM_INTF_CAPS(tpm, tpm->locality)); + printk("TPM interface capabilities (0x%x):\n", intfcaps); + if (intfcaps & TPM_INTF_BURST_COUNT_STATIC) + printk("\tBurst Count Static\n"); + if (intfcaps & TPM_INTF_CMD_READY_INT) + printk("\tCommand Ready Int Support\n"); + if (intfcaps & TPM_INTF_INT_EDGE_FALLING) + printk("\tInterrupt Edge Falling\n"); + if (intfcaps & TPM_INTF_INT_EDGE_RISING) + printk("\tInterrupt Edge Rising\n"); + if (intfcaps & TPM_INTF_INT_LEVEL_LOW) + printk("\tInterrupt Level Low\n"); + if (intfcaps & TPM_INTF_INT_LEVEL_HIGH) + printk("\tInterrupt Level High\n"); + if (intfcaps & TPM_INTF_LOCALITY_CHANGE_INT) + printk("\tLocality Change Int Support\n"); + if (intfcaps & TPM_INTF_STS_VALID_INT) + printk("\tSts Valid Int Support\n"); + if (intfcaps & TPM_INTF_DATA_AVAIL_INT) + printk("\tData Avail Int Support\n"); + + /*Interupt setup */ + intmask = ioread32(TPM_INT_ENABLE(tpm, tpm->locality)); + + intmask |= TPM_INTF_CMD_READY_INT | TPM_INTF_LOCALITY_CHANGE_INT | + TPM_INTF_DATA_AVAIL_INT | TPM_INTF_STS_VALID_INT; + + iowrite32(TPM_INT_ENABLE(tpm, tpm->locality), intmask); + + /*If interupts are enabled, handle it */ + if (irq) { + if (irq != TPM_PROBE_IRQ) { + tpm->irq = irq; + } else { + /*FIXME add irq probing feature later */ + printk("IRQ probing not implemented\n"); + } + } + + if (tpm->irq) { + iowrite8(TPM_INT_VECTOR(tpm, tpm->locality), tpm->irq); + if (bind_pirq(tpm->irq, 1, tpm_tis_irq_handler, tpm) != 0) { + printk("Unabled to request irq: %u for use\n", tpm->irq); + printk("Will use polling mode\n"); + tpm->irq = 0; + } else { + + /* Clear all existing */ + iowrite32(TPM_INT_STATUS(tpm, tpm->locality), + ioread32(TPM_INT_STATUS(tpm, tpm->locality))); + + /* Turn on interrupts */ + iowrite32(TPM_INT_ENABLE(tpm, tpm->locality), + intmask | TPM_GLOBAL_INT_ENABLE); + } + } + + tpm2_selftest(tpm); + return tpm; + +abort_egress: + if (tpm != NULL) { + shutdown_tpm_tis(tpm); + } + return NULL; +} +#endif diff -Nru xen-4.9.0/extras/mini-os/.travis.yml xen-4.9.2/extras/mini-os/.travis.yml --- xen-4.9.0/extras/mini-os/.travis.yml 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/.travis.yml 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,25 @@ +language: c +dist: trusty +sudo: required +# don't test stable branches +branches: + except: + - /^stable-.*/ +matrix: + include: + - compiler: gcc +addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - libc6-dev-i386 + - gcc-5 + - g++-5 +# we must set CXX manually instead of using 'language: cpp' due to +# travis-ci/travis-ci#3871 +before_script: + - export CXX=${CC/cc/++} + - export CXX=${CXX/clang/clang++} +script: + - ./scripts/travis-build diff -Nru xen-4.9.0/extras/mini-os/xenbus/xenbus.c xen-4.9.2/extras/mini-os/xenbus/xenbus.c --- xen-4.9.0/extras/mini-os/xenbus/xenbus.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/extras/mini-os/xenbus/xenbus.c 2017-02-22 13:09:16.000000000 +0000 @@ -0,0 +1,897 @@ +/* + **************************************************************************** + * (C) 2006 - Cambridge University + **************************************************************************** + * + * File: xenbus.c + * Author: Steven Smith (sos22@cam.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * Changes: John D. Ramsdell + * + * Date: Jun 2006, chages Aug 2005 + * + * Environment: Xen Minimal OS + * Description: Minimal implementation of xenbus + * + **************************************************************************** + **/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define min(x,y) ({ \ + typeof(x) tmpx = (x); \ + typeof(y) tmpy = (y); \ + tmpx < tmpy ? tmpx : tmpy; \ + }) + +#ifdef XENBUS_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=xenbus.c, line=%d) " _f , __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + +static struct xenstore_domain_interface *xenstore_buf; +static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); +DECLARE_WAIT_QUEUE_HEAD(xenbus_watch_queue); + +xenbus_event_queue xenbus_events; +static struct watch { + char *token; + xenbus_event_queue *events; + struct watch *next; +} *watches; +struct xenbus_req_info +{ + int in_use:1; + struct wait_queue_head waitq; + void *reply; +}; + +#define NR_REQS 32 +static struct xenbus_req_info req_info[NR_REQS]; + +uint32_t xenbus_evtchn; + +#ifdef CONFIG_PARAVIRT +void get_xenbus(void *p) +{ + start_info_t *si = p; + + xenbus_evtchn = si->store_evtchn; + xenstore_buf = mfn_to_virt(si->store_mfn); +} +#else +void get_xenbus(void *p) +{ + uint64_t v; + + if ( hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v) ) + BUG(); + xenbus_evtchn = v; + + if( hvm_get_parameter(HVM_PARAM_STORE_PFN, &v) ) + BUG(); + xenstore_buf = (struct xenstore_domain_interface *)map_frame_virt(v); +} +#endif + +static void memcpy_from_ring(const void *Ring, + void *Dest, + int off, + int len) +{ + int c1, c2; + const char *ring = Ring; + char *dest = Dest; + c1 = min(len, XENSTORE_RING_SIZE - off); + c2 = len - c1; + memcpy(dest, ring + off, c1); + memcpy(dest + c1, ring, c2); +} + +char **xenbus_wait_for_watch_return(xenbus_event_queue *queue) +{ + struct xenbus_event *event; + DEFINE_WAIT(w); + if (!queue) + queue = &xenbus_events; + while (!(event = *queue)) { + add_waiter(w, xenbus_watch_queue); + schedule(); + } + remove_waiter(w, xenbus_watch_queue); + *queue = event->next; + return &event->path; +} + +void xenbus_wait_for_watch(xenbus_event_queue *queue) +{ + char **ret; + if (!queue) + queue = &xenbus_events; + ret = xenbus_wait_for_watch_return(queue); + if (ret) + free(ret); + else + printk("unexpected path returned by watch\n"); +} + +char* xenbus_wait_for_value(const char* path, const char* value, xenbus_event_queue *queue) +{ + if (!queue) + queue = &xenbus_events; + for(;;) + { + char *res, *msg; + int r; + + msg = xenbus_read(XBT_NIL, path, &res); + if(msg) return msg; + + r = strcmp(value,res); + free(res); + + if(r==0) break; + else xenbus_wait_for_watch(queue); + } + return NULL; +} + +char *xenbus_switch_state(xenbus_transaction_t xbt, const char* path, XenbusState state) +{ + char *current_state; + char *msg = NULL; + char *msg2 = NULL; + char value[2]; + XenbusState rs; + int xbt_flag = 0; + int retry = 0; + + do { + if (xbt == XBT_NIL) { + msg = xenbus_transaction_start(&xbt); + if (msg) goto exit; + xbt_flag = 1; + } + + msg = xenbus_read(xbt, path, ¤t_state); + if (msg) goto exit; + + rs = (XenbusState) (current_state[0] - '0'); + free(current_state); + if (rs == state) { + msg = NULL; + goto exit; + } + + snprintf(value, 2, "%d", state); + msg = xenbus_write(xbt, path, value); + +exit: + if (xbt_flag) { + msg2 = xenbus_transaction_end(xbt, 0, &retry); + xbt = XBT_NIL; + } + if (msg == NULL && msg2 != NULL) + msg = msg2; + } while (retry); + + return msg; +} + +char *xenbus_wait_for_state_change(const char* path, XenbusState *state, xenbus_event_queue *queue) +{ + if (!queue) + queue = &xenbus_events; + for(;;) + { + char *res, *msg; + XenbusState rs; + + msg = xenbus_read(XBT_NIL, path, &res); + if(msg) return msg; + + rs = (XenbusState) (res[0] - 48); + free(res); + + if (rs == *state) + xenbus_wait_for_watch(queue); + else { + *state = rs; + break; + } + } + return NULL; +} + + +static void xenbus_thread_func(void *ign) +{ + struct xsd_sockmsg msg; + unsigned prod = xenstore_buf->rsp_prod; + + for (;;) + { + wait_event(xb_waitq, prod != xenstore_buf->rsp_prod); + while (1) + { + prod = xenstore_buf->rsp_prod; + DEBUG("Rsp_cons %d, rsp_prod %d.\n", xenstore_buf->rsp_cons, + xenstore_buf->rsp_prod); + if (xenstore_buf->rsp_prod - xenstore_buf->rsp_cons < sizeof(msg)) + break; + rmb(); + memcpy_from_ring(xenstore_buf->rsp, + &msg, + MASK_XENSTORE_IDX(xenstore_buf->rsp_cons), + sizeof(msg)); + DEBUG("Msg len %d, %d avail, id %d.\n", + msg.len + sizeof(msg), + xenstore_buf->rsp_prod - xenstore_buf->rsp_cons, + msg.req_id); + if (xenstore_buf->rsp_prod - xenstore_buf->rsp_cons < + sizeof(msg) + msg.len) + break; + + DEBUG("Message is good.\n"); + + if(msg.type == XS_WATCH_EVENT) + { + struct xenbus_event *event = malloc(sizeof(*event) + msg.len); + xenbus_event_queue *events = NULL; + char *data = (char*)event + sizeof(*event); + struct watch *watch; + + memcpy_from_ring(xenstore_buf->rsp, + data, + MASK_XENSTORE_IDX(xenstore_buf->rsp_cons + sizeof(msg)), + msg.len); + + event->path = data; + event->token = event->path + strlen(event->path) + 1; + + mb(); + xenstore_buf->rsp_cons += msg.len + sizeof(msg); + + for (watch = watches; watch; watch = watch->next) + if (!strcmp(watch->token, event->token)) { + events = watch->events; + break; + } + + if (events) { + event->next = *events; + *events = event; + wake_up(&xenbus_watch_queue); + } else { + printk("unexpected watch token %s\n", event->token); + free(event); + } + } + + else + { + req_info[msg.req_id].reply = malloc(sizeof(msg) + msg.len); + memcpy_from_ring(xenstore_buf->rsp, + req_info[msg.req_id].reply, + MASK_XENSTORE_IDX(xenstore_buf->rsp_cons), + msg.len + sizeof(msg)); + mb(); + xenstore_buf->rsp_cons += msg.len + sizeof(msg); + wake_up(&req_info[msg.req_id].waitq); + } + + wmb(); + notify_remote_via_evtchn(xenbus_evtchn); + } + } +} + +static void xenbus_evtchn_handler(evtchn_port_t port, struct pt_regs *regs, + void *ign) +{ + wake_up(&xb_waitq); +} + +static int nr_live_reqs; +static DEFINE_SPINLOCK(req_lock); +static DECLARE_WAIT_QUEUE_HEAD(req_wq); + +/* Release a xenbus identifier */ +static void release_xenbus_id(int id) +{ + BUG_ON(!req_info[id].in_use); + spin_lock(&req_lock); + req_info[id].in_use = 0; + nr_live_reqs--; + req_info[id].in_use = 0; + if (nr_live_reqs == NR_REQS - 1) + wake_up(&req_wq); + spin_unlock(&req_lock); +} + +/* Allocate an identifier for a xenbus request. Blocks if none are + available. */ +static int allocate_xenbus_id(void) +{ + static int probe; + int o_probe; + + while (1) + { + spin_lock(&req_lock); + if (nr_live_reqs < NR_REQS) + break; + spin_unlock(&req_lock); + wait_event(req_wq, (nr_live_reqs < NR_REQS)); + } + + o_probe = probe; + for (;;) + { + if (!req_info[o_probe].in_use) + break; + o_probe = (o_probe + 1) % NR_REQS; + BUG_ON(o_probe == probe); + } + nr_live_reqs++; + req_info[o_probe].in_use = 1; + probe = (o_probe + 1) % NR_REQS; + spin_unlock(&req_lock); + init_waitqueue_head(&req_info[o_probe].waitq); + + return o_probe; +} + +/* Initialise xenbus. */ +void init_xenbus(void) +{ + int err; + DEBUG("init_xenbus called.\n"); + create_thread("xenstore", xenbus_thread_func, NULL); + DEBUG("buf at %p.\n", xenstore_buf); + err = bind_evtchn(xenbus_evtchn, xenbus_evtchn_handler, NULL); + unmask_evtchn(xenbus_evtchn); + printk("xenbus initialised on irq %d\n", err); +} + +void fini_xenbus(void) +{ +} + +/* Send data to xenbus. This can block. All of the requests are seen + by xenbus as if sent atomically. The header is added + automatically, using type %type, req_id %req_id, and trans_id + %trans_id. */ +static void xb_write(int type, int req_id, xenbus_transaction_t trans_id, + const struct write_req *req, int nr_reqs) +{ + XENSTORE_RING_IDX prod; + int r; + int len = 0; + const struct write_req *cur_req; + int req_off; + int total_off; + int this_chunk; + struct xsd_sockmsg m = {.type = type, .req_id = req_id, + .tx_id = trans_id }; + struct write_req header_req = { &m, sizeof(m) }; + + for (r = 0; r < nr_reqs; r++) + len += req[r].len; + m.len = len; + len += sizeof(m); + + cur_req = &header_req; + + BUG_ON(len > XENSTORE_RING_SIZE); + /* Wait for the ring to drain to the point where we can send the + message. */ + prod = xenstore_buf->req_prod; + if (prod + len - xenstore_buf->req_cons > XENSTORE_RING_SIZE) + { + /* Wait for there to be space on the ring */ + DEBUG("prod %d, len %d, cons %d, size %d; waiting.\n", + prod, len, xenstore_buf->req_cons, XENSTORE_RING_SIZE); + wait_event(xb_waitq, + xenstore_buf->req_prod + len - xenstore_buf->req_cons <= + XENSTORE_RING_SIZE); + DEBUG("Back from wait.\n"); + prod = xenstore_buf->req_prod; + } + + /* We're now guaranteed to be able to send the message without + overflowing the ring. Do so. */ + total_off = 0; + req_off = 0; + while (total_off < len) + { + this_chunk = min(cur_req->len - req_off, + XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod)); + memcpy((char *)xenstore_buf->req + MASK_XENSTORE_IDX(prod), + (char *)cur_req->data + req_off, this_chunk); + prod += this_chunk; + req_off += this_chunk; + total_off += this_chunk; + if (req_off == cur_req->len) + { + req_off = 0; + if (cur_req == &header_req) + cur_req = req; + else + cur_req++; + } + } + + DEBUG("Complete main loop of xb_write.\n"); + BUG_ON(req_off != 0); + BUG_ON(total_off != len); + BUG_ON(prod > xenstore_buf->req_cons + XENSTORE_RING_SIZE); + + /* Remote must see entire message before updating indexes */ + wmb(); + + xenstore_buf->req_prod += len; + + /* Send evtchn to notify remote */ + notify_remote_via_evtchn(xenbus_evtchn); +} + +/* Send a mesasge to xenbus, in the same fashion as xb_write, and + block waiting for a reply. The reply is malloced and should be + freed by the caller. */ +struct xsd_sockmsg * +xenbus_msg_reply(int type, + xenbus_transaction_t trans, + struct write_req *io, + int nr_reqs) +{ + int id; + DEFINE_WAIT(w); + struct xsd_sockmsg *rep; + + id = allocate_xenbus_id(); + add_waiter(w, req_info[id].waitq); + + xb_write(type, id, trans, io, nr_reqs); + + schedule(); + remove_waiter(w, req_info[id].waitq); + wake(current); + + rep = req_info[id].reply; + BUG_ON(rep->req_id != id); + release_xenbus_id(id); + return rep; +} + +static char *errmsg(struct xsd_sockmsg *rep) +{ + char *res; + if (!rep) { + char msg[] = "No reply"; + size_t len = strlen(msg) + 1; + return memcpy(malloc(len), msg, len); + } + if (rep->type != XS_ERROR) + return NULL; + res = malloc(rep->len + 1); + memcpy(res, rep + 1, rep->len); + res[rep->len] = 0; + free(rep); + return res; +} + +/* Send a debug message to xenbus. Can block. */ +static void xenbus_debug_msg(const char *msg) +{ + int len = strlen(msg); + struct write_req req[] = { + { "print", sizeof("print") }, + { msg, len }, + { "", 1 }}; + struct xsd_sockmsg *reply; + + reply = xenbus_msg_reply(XS_DEBUG, 0, req, ARRAY_SIZE(req)); + printk("Got a reply, type %d, id %d, len %d.\n", + reply->type, reply->req_id, reply->len); +} + +/* List the contents of a directory. Returns a malloc()ed array of + pointers to malloc()ed strings. The array is NULL terminated. May + block. */ +char *xenbus_ls(xenbus_transaction_t xbt, const char *pre, char ***contents) +{ + struct xsd_sockmsg *reply, *repmsg; + struct write_req req[] = { { pre, strlen(pre)+1 } }; + int nr_elems, x, i; + char **res, *msg; + + repmsg = xenbus_msg_reply(XS_DIRECTORY, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(repmsg); + if (msg) { + *contents = NULL; + return msg; + } + reply = repmsg + 1; + for (x = nr_elems = 0; x < repmsg->len; x++) + nr_elems += (((char *)reply)[x] == 0); + res = malloc(sizeof(res[0]) * (nr_elems + 1)); + for (x = i = 0; i < nr_elems; i++) { + int l = strlen((char *)reply + x); + res[i] = malloc(l + 1); + memcpy(res[i], (char *)reply + x, l + 1); + x += l + 1; + } + res[i] = NULL; + free(repmsg); + *contents = res; + return NULL; +} + +char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value) +{ + struct write_req req[] = { {path, strlen(path) + 1} }; + struct xsd_sockmsg *rep; + char *res, *msg; + rep = xenbus_msg_reply(XS_READ, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) { + *value = NULL; + return msg; + } + res = malloc(rep->len + 1); + memcpy(res, rep + 1, rep->len); + res[rep->len] = 0; + free(rep); + *value = res; + return NULL; +} + +char *xenbus_write(xenbus_transaction_t xbt, const char *path, const char *value) +{ + struct write_req req[] = { + {path, strlen(path) + 1}, + {value, strlen(value)}, + }; + struct xsd_sockmsg *rep; + char *msg; + rep = xenbus_msg_reply(XS_WRITE, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) return msg; + free(rep); + return NULL; +} + +char* xenbus_watch_path_token( xenbus_transaction_t xbt, const char *path, const char *token, xenbus_event_queue *events) +{ + struct xsd_sockmsg *rep; + + struct write_req req[] = { + {path, strlen(path) + 1}, + {token, strlen(token) + 1}, + }; + + struct watch *watch = malloc(sizeof(*watch)); + + char *msg; + + if (!events) + events = &xenbus_events; + + watch->token = strdup(token); + watch->events = events; + watch->next = watches; + watches = watch; + + rep = xenbus_msg_reply(XS_WATCH, xbt, req, ARRAY_SIZE(req)); + + msg = errmsg(rep); + if (msg) return msg; + free(rep); + + return NULL; +} + +char* xenbus_unwatch_path_token( xenbus_transaction_t xbt, const char *path, const char *token) +{ + struct xsd_sockmsg *rep; + + struct write_req req[] = { + {path, strlen(path) + 1}, + {token, strlen(token) + 1}, + }; + + struct watch *watch, **prev; + + char *msg; + + rep = xenbus_msg_reply(XS_UNWATCH, xbt, req, ARRAY_SIZE(req)); + + msg = errmsg(rep); + if (msg) return msg; + free(rep); + + for (prev = &watches, watch = *prev; watch; prev = &watch->next, watch = *prev) + if (!strcmp(watch->token, token)) { + free(watch->token); + *prev = watch->next; + free(watch); + break; + } + + return NULL; +} + +char *xenbus_rm(xenbus_transaction_t xbt, const char *path) +{ + struct write_req req[] = { {path, strlen(path) + 1} }; + struct xsd_sockmsg *rep; + char *msg; + rep = xenbus_msg_reply(XS_RM, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) + return msg; + free(rep); + return NULL; +} + +char *xenbus_get_perms(xenbus_transaction_t xbt, const char *path, char **value) +{ + struct write_req req[] = { {path, strlen(path) + 1} }; + struct xsd_sockmsg *rep; + char *res, *msg; + rep = xenbus_msg_reply(XS_GET_PERMS, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) { + *value = NULL; + return msg; + } + res = malloc(rep->len + 1); + memcpy(res, rep + 1, rep->len); + res[rep->len] = 0; + free(rep); + *value = res; + return NULL; +} + +#define PERM_MAX_SIZE 32 +char *xenbus_set_perms(xenbus_transaction_t xbt, const char *path, domid_t dom, char perm) +{ + char value[PERM_MAX_SIZE]; + struct write_req req[] = { + {path, strlen(path) + 1}, + {value, 0}, + }; + struct xsd_sockmsg *rep; + char *msg; + snprintf(value, PERM_MAX_SIZE, "%c%hu", perm, dom); + req[1].len = strlen(value) + 1; + rep = xenbus_msg_reply(XS_SET_PERMS, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) + return msg; + free(rep); + return NULL; +} + +char *xenbus_transaction_start(xenbus_transaction_t *xbt) +{ + /* xenstored becomes angry if you send a length 0 message, so just + shove a nul terminator on the end */ + struct write_req req = { "", 1}; + struct xsd_sockmsg *rep; + char *err; + + rep = xenbus_msg_reply(XS_TRANSACTION_START, 0, &req, 1); + err = errmsg(rep); + if (err) + return err; + sscanf((char *)(rep + 1), "%lu", xbt); + free(rep); + return NULL; +} + +char * +xenbus_transaction_end(xenbus_transaction_t t, int abort, int *retry) +{ + struct xsd_sockmsg *rep; + struct write_req req; + char *err; + + *retry = 0; + + req.data = abort ? "F" : "T"; + req.len = 2; + rep = xenbus_msg_reply(XS_TRANSACTION_END, t, &req, 1); + err = errmsg(rep); + if (err) { + if (!strcmp(err, "EAGAIN")) { + *retry = 1; + free(err); + return NULL; + } else { + return err; + } + } + free(rep); + return NULL; +} + +int xenbus_read_integer(const char *path) +{ + char *res, *buf; + int t; + + res = xenbus_read(XBT_NIL, path, &buf); + if (res) { + printk("Failed to read %s.\n", path); + free(res); + return -1; + } + sscanf(buf, "%d", &t); + free(buf); + return t; +} + +int xenbus_read_uuid(const char* path, unsigned char uuid[16]) { + char * res, *buf; + res = xenbus_read(XBT_NIL, path, &buf); + if(res) { + printk("Failed to read %s.\n", path); + free(res); + return 0; + } + if(strlen(buf) != ((2*16)+4) /* 16 hex bytes and 4 hyphens */ + || sscanf(buf, + "%2hhx%2hhx%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx", + uuid, uuid + 1, uuid + 2, uuid + 3, + uuid + 4, uuid + 5, uuid + 6, uuid + 7, + uuid + 8, uuid + 9, uuid + 10, uuid + 11, + uuid + 12, uuid + 13, uuid + 14, uuid + 15) != 16) { + printk("Xenbus path %s value %s is not a uuid!\n", path, buf); + free(buf); + return 0; + } + free(buf); + return 1; +} + +char* xenbus_printf(xenbus_transaction_t xbt, + const char* node, const char* path, + const char* fmt, ...) +{ +#define BUFFER_SIZE 256 + char fullpath[BUFFER_SIZE]; + char val[BUFFER_SIZE]; + va_list args; + + BUG_ON(strlen(node) + strlen(path) + 1 >= BUFFER_SIZE); + sprintf(fullpath,"%s/%s", node, path); + va_start(args, fmt); + vsprintf(val, fmt, args); + va_end(args); + return xenbus_write(xbt,fullpath,val); +} + +domid_t xenbus_get_self_id(void) +{ + char *dom_id; + domid_t ret; + + BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id)); + sscanf(dom_id, "%"SCNd16, &ret); + + return ret; +} + +static void do_ls_test(const char *pre) +{ + char **dirs, *msg; + int x; + + printk("ls %s...\n", pre); + msg = xenbus_ls(XBT_NIL, pre, &dirs); + if (msg) { + printk("Error in xenbus ls: %s\n", msg); + free(msg); + return; + } + for (x = 0; dirs[x]; x++) + { + printk("ls %s[%d] -> %s\n", pre, x, dirs[x]); + free(dirs[x]); + } + free(dirs); +} + +static void do_read_test(const char *path) +{ + char *res, *msg; + printk("Read %s...\n", path); + msg = xenbus_read(XBT_NIL, path, &res); + if (msg) { + printk("Error in xenbus read: %s\n", msg); + free(msg); + return; + } + printk("Read %s -> %s.\n", path, res); + free(res); +} + +static void do_write_test(const char *path, const char *val) +{ + char *msg; + printk("Write %s to %s...\n", val, path); + msg = xenbus_write(XBT_NIL, path, val); + if (msg) { + printk("Result %s\n", msg); + free(msg); + } else { + printk("Success.\n"); + } +} + +static void do_rm_test(const char *path) +{ + char *msg; + printk("rm %s...\n", path); + msg = xenbus_rm(XBT_NIL, path); + if (msg) { + printk("Result %s\n", msg); + free(msg); + } else { + printk("Success.\n"); + } +} + +/* Simple testing thing */ +void test_xenbus(void) +{ + printk("Doing xenbus test.\n"); + xenbus_debug_msg("Testing xenbus...\n"); + + printk("Doing ls test.\n"); + do_ls_test("device"); + do_ls_test("device/vif"); + do_ls_test("device/vif/0"); + + printk("Doing read test.\n"); + do_read_test("device/vif/0/mac"); + do_read_test("device/vif/0/backend"); + + printk("Doing write test.\n"); + do_write_test("device/vif/0/flibble", "flobble"); + do_read_test("device/vif/0/flibble"); + do_write_test("device/vif/0/flibble", "widget"); + do_read_test("device/vif/0/flibble"); + + printk("Doing rm test.\n"); + do_rm_test("device/vif/0/flibble"); + do_read_test("device/vif/0/flibble"); + printk("(Should have said ENOENT)\n"); +} + +/* + * Local variables: + * mode: C + * c-basic-offset: 4 + * End: + */ diff -Nru xen-4.9.0/.gitarchive-info xen-4.9.2/.gitarchive-info --- xen-4.9.0/.gitarchive-info 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/.gitarchive-info 2018-03-28 13:10:55.000000000 +0000 @@ -1,2 +1,2 @@ -Changeset: c30bf55594a53fae8aae08aabf16fc192faad7da -Commit date: Tue, 27 Jun 2017 19:13:19 +0100 +Changeset: ad4fefdd088e47dcc017efefc4857e1610c832af +Commit date: Wed, 28 Mar 2018 15:10:55 +0200 diff -Nru xen-4.9.0/MAINTAINERS xen-4.9.2/MAINTAINERS --- xen-4.9.0/MAINTAINERS 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/MAINTAINERS 2018-03-28 13:10:55.000000000 +0000 @@ -50,9 +50,16 @@ information. Backport requests should be made on the xen-devel@lists.xenproject.org -list. Remember to copy the appropriate stable branch maintainer who -will be listed in this section of the MAINTAINERS file in the -appropriate branch. +list. Remember to copy the appropriate stable branch maintainer. + +The maintainer for this branch is: + + Jan Beulich + +Tools backport requests should also be copied to: + + Ian Jackson + Unstable Subsystem Maintainers ============================== diff -Nru xen-4.9.0/tools/firmware/hvmloader/pci.c xen-4.9.2/tools/firmware/hvmloader/pci.c --- xen-4.9.0/tools/firmware/hvmloader/pci.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/firmware/hvmloader/pci.c 2018-03-28 13:10:55.000000000 +0000 @@ -84,7 +84,6 @@ uint32_t vga_devfn = 256; uint16_t class, vendor_id, device_id; unsigned int bar, pin, link, isa_irq; - int next_rmrr; /* Resources assignable to PCI devices via BARs. */ struct resource { @@ -403,8 +402,6 @@ io_resource.base = 0xc000; io_resource.max = 0x10000; - next_rmrr = find_next_rmrr(pci_mem_start); - /* Assign iomem and ioport resources in descending order of size. */ for ( i = 0; i < nr_bars; i++ ) { @@ -462,15 +459,20 @@ base = (resource->base + bar_sz - 1) & ~(uint64_t)(bar_sz - 1); /* If we're using mem_resource, check for RMRR conflicts. */ - while ( resource == &mem_resource && - next_rmrr >= 0 && - check_overlap(base, bar_sz, + if ( resource == &mem_resource) + { + int next_rmrr = find_next_rmrr(base); + + while ( next_rmrr >= 0 && + check_overlap(base, bar_sz, memory_map.map[next_rmrr].addr, memory_map.map[next_rmrr].size) ) - { - base = memory_map.map[next_rmrr].addr + memory_map.map[next_rmrr].size; - base = (base + bar_sz - 1) & ~(bar_sz - 1); - next_rmrr = find_next_rmrr(base); + { + base = memory_map.map[next_rmrr].addr + + memory_map.map[next_rmrr].size; + base = (base + bar_sz - 1) & ~(bar_sz - 1); + next_rmrr = find_next_rmrr(base); + } } bar_data |= (uint32_t)base; diff -Nru xen-4.9.0/tools/firmware/rombios/32bit/Makefile xen-4.9.2/tools/firmware/rombios/32bit/Makefile --- xen-4.9.0/tools/firmware/rombios/32bit/Makefile 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/firmware/rombios/32bit/Makefile 2018-03-28 13:10:55.000000000 +0000 @@ -4,6 +4,8 @@ TARGET = 32bitbios_flat.h CFLAGS += $(CFLAGS_xeninclude) -I.. -I../../../libacpi +$(call cc-option-add,CFLAGS,CC,-fno-pic) +$(call cc-option-add,CFLAGS,CC,-fno-PIE) SUBDIRS = tcgbios diff -Nru xen-4.9.0/tools/firmware/rombios/32bit/tcgbios/Makefile xen-4.9.2/tools/firmware/rombios/32bit/tcgbios/Makefile --- xen-4.9.0/tools/firmware/rombios/32bit/tcgbios/Makefile 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/firmware/rombios/32bit/tcgbios/Makefile 2018-03-28 13:10:55.000000000 +0000 @@ -4,6 +4,8 @@ TARGET = tcgbiosext.o CFLAGS += $(CFLAGS_xeninclude) -I.. -I../.. -I../../../../libacpi +$(call cc-option-add,CFLAGS,CC,-fno-pic) +$(call cc-option-add,CFLAGS,CC,-fno-PIE) .PHONY: all all: $(TARGET) diff -Nru xen-4.9.0/tools/fuzz/x86_instruction_emulator/afl-harness.c xen-4.9.2/tools/fuzz/x86_instruction_emulator/afl-harness.c --- xen-4.9.0/tools/fuzz/x86_instruction_emulator/afl-harness.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/fuzz/x86_instruction_emulator/afl-harness.c 2018-03-28 13:10:55.000000000 +0000 @@ -77,6 +77,17 @@ exit(-1); } } +#ifdef __AFL_HAVE_MANUAL_CONTROL + else + { + /* + * This will ensure we're dealing with a clean stream + * state after the afl-fuzz process messes with the open + * file handle. + */ + fseek(fp, 0, SEEK_SET); + } +#endif size = fread(input, 1, INPUT_SIZE, fp); diff -Nru xen-4.9.0/tools/fuzz/x86_instruction_emulator/fuzz-emul.c xen-4.9.2/tools/fuzz/x86_instruction_emulator/fuzz-emul.c --- xen-4.9.0/tools/fuzz/x86_instruction_emulator/fuzz-emul.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/fuzz/x86_instruction_emulator/fuzz-emul.c 2018-03-28 13:10:55.000000000 +0000 @@ -139,7 +139,18 @@ struct x86_emulate_ctxt *ctxt) { /* Reads expected for all user and system segments. */ - assert(is_x86_user_segment(seg) || is_x86_system_segment(seg)); + if ( is_x86_user_segment(seg) ) + assert(ctxt->addr_size == 64 || !(offset >> 32)); + else if ( seg == x86_seg_tr ) + /* + * The TSS is special in that accesses below the segment base are + * possible, as the Interrupt Redirection Bitmap starts 32 bytes + * ahead of the I/O Bitmap, regardless of the value of the latter. + */ + assert((long)offset < 0 ? (long)offset > -32 : !(offset >> 17)); + else + assert(is_x86_system_segment(seg) && + (ctxt->lma ? offset <= 0x10007 : !(offset >> 16))); return data_read(ctxt, seg, "read", p_data, bytes); } @@ -162,6 +173,13 @@ { assert(seg == x86_seg_cs); + /* Minimal segment limit checking, until full one is being put in place. */ + if ( ctxt->addr_size < 64 && (offset >> 32) ) + { + x86_emul_hw_exception(13, 0, ctxt); + return X86EMUL_EXCEPTION; + } + /* * Zero-length instruction fetches are made at the destination of jumps, * to perform segmentation checks. No data needs returning. @@ -232,6 +250,7 @@ struct x86_emulate_ctxt *ctxt) { assert(dst_seg == x86_seg_es); + assert(ctxt->addr_size == 64 || !(dst_offset >> 32)); return _fuzz_rep_read(ctxt, "rep_ins", reps); } @@ -247,6 +266,7 @@ { assert(is_x86_user_segment(src_seg)); assert(dst_seg == x86_seg_es); + assert(ctxt->addr_size == 64 || !((src_offset | dst_offset) >> 32)); return _fuzz_rep_read(ctxt, "rep_movs", reps); } @@ -260,6 +280,7 @@ struct x86_emulate_ctxt *ctxt) { assert(is_x86_user_segment(src_seg)); + assert(ctxt->addr_size == 64 || !(src_offset >> 32)); return _fuzz_rep_write(ctxt, "rep_outs", reps); } @@ -277,6 +298,7 @@ * for CLZERO. */ assert(is_x86_user_segment(seg)); + assert(ctxt->addr_size == 64 || !(offset >> 32)); return _fuzz_rep_write(ctxt, "rep_stos", reps); } @@ -290,6 +312,7 @@ { /* Writes not expected for any system segments. */ assert(is_x86_user_segment(seg)); + assert(ctxt->addr_size == 64 || !(offset >> 32)); return maybe_fail(ctxt, "write", true); } @@ -306,8 +329,10 @@ * Cmpxchg expected for user segments, and setting accessed/busy bits in * GDT/LDT enties, but not expected for any IDT or TR accesses. */ - assert(is_x86_user_segment(seg) || - seg == x86_seg_gdtr || seg == x86_seg_ldtr); + if ( is_x86_user_segment(seg) ) + assert(ctxt->addr_size == 64 || !(offset >> 32)); + else + assert((seg == x86_seg_gdtr || seg == x86_seg_ldtr) && !(offset >> 16)); return maybe_fail(ctxt, "cmpxchg", true); } @@ -319,6 +344,7 @@ { /* invlpg(), unlike all other hooks, may be called with x86_seg_none. */ assert(is_x86_user_segment(seg) || seg == x86_seg_none); + assert(ctxt->addr_size == 64 || !(offset >> 32)); return maybe_fail(ctxt, "invlpg", false); } @@ -759,13 +785,12 @@ int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size) { - struct cpu_user_regs regs = {}; struct fuzz_state state = { .ops = all_fuzzer_ops, }; struct x86_emulate_ctxt ctxt = { .data = &state, - .regs = ®s, + .regs = &input.regs, .addr_size = 8 * sizeof(void *), .sp_size = 8 * sizeof(void *), }; diff -Nru xen-4.9.0/tools/libxc/xc_cpuid_x86.c xen-4.9.2/tools/libxc/xc_cpuid_x86.c --- xen-4.9.0/tools/libxc/xc_cpuid_x86.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/libxc/xc_cpuid_x86.c 2018-03-28 13:10:55.000000000 +0000 @@ -465,7 +465,9 @@ case 0x80000008: regs[0] &= 0x0000ffffu; - regs[1] = regs[3] = 0; + regs[1] = info->featureset[featureword_of(X86_FEATURE_CLZERO)]; + /* regs[2] handled in the per-vendor logic. */ + regs[3] = 0; break; case 0x00000002: /* Intel cache info (dumped by AMD policy) */ diff -Nru xen-4.9.0/tools/libxc/xc_dom_arm.c xen-4.9.2/tools/libxc/xc_dom_arm.c --- xen-4.9.0/tools/libxc/xc_dom_arm.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/libxc/xc_dom_arm.c 2018-03-28 13:10:55.000000000 +0000 @@ -223,6 +223,8 @@ domctl.domain = domid; domctl.cmd = XEN_DOMCTL_set_address_size; + domctl.u.address_size.size = 0; + for ( i = 0; i < ARRAY_SIZE(types); i++ ) if ( !strcmp(types[i].guest, guest_type) ) domctl.u.address_size.size = types[i].size; diff -Nru xen-4.9.0/tools/libxc/xc_sr_restore_x86_pv.c xen-4.9.2/tools/libxc/xc_sr_restore_x86_pv.c --- xen-4.9.0/tools/libxc/xc_sr_restore_x86_pv.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/libxc/xc_sr_restore_x86_pv.c 2018-03-28 13:10:55.000000000 +0000 @@ -455,8 +455,8 @@ domctl.cmd = XEN_DOMCTL_set_vcpu_msrs; domctl.domain = ctx->domid; domctl.u.vcpu_msrs.vcpu = vcpuid; - domctl.u.vcpu_msrs.msr_count = vcpu->msrsz % sizeof(xen_domctl_vcpu_msr_t); - set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer); + domctl.u.vcpu_msrs.msr_count = vcpu->msrsz / sizeof(xen_domctl_vcpu_msr_t); + set_xen_guest_handle(domctl.u.vcpu_msrs.msrs, buffer); memcpy(buffer, vcpu->msr, vcpu->msrsz); diff -Nru xen-4.9.0/tools/libxl/libxl_arm_acpi.c xen-4.9.2/tools/libxl/libxl_arm_acpi.c --- xen-4.9.0/tools/libxl/libxl_arm_acpi.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/libxl/libxl_arm_acpi.c 2018-03-28 13:10:55.000000000 +0000 @@ -37,7 +37,7 @@ #define BITS_PER_LONG 32 #endif #endif -#define ACPI_MACHINE_WIDTH __BITS_PER_LONG +#define ACPI_MACHINE_WIDTH BITS_PER_LONG #define COMPILER_DEPENDENT_INT64 int64_t #define COMPILER_DEPENDENT_UINT64 uint64_t diff -Nru xen-4.9.0/tools/libxl/libxl_cpuid.c xen-4.9.2/tools/libxl/libxl_cpuid.c --- xen-4.9.0/tools/libxl/libxl_cpuid.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/libxl/libxl_cpuid.c 2018-03-28 13:10:55.000000000 +0000 @@ -158,6 +158,8 @@ {"de", 0x00000001, NA, CPUID_REG_EDX, 2, 1}, {"vme", 0x00000001, NA, CPUID_REG_EDX, 1, 1}, {"fpu", 0x00000001, NA, CPUID_REG_EDX, 0, 1}, + {"ibrsb", 0x00000007, 0, CPUID_REG_EDX, 26, 1}, + {"stibp", 0x00000007, 0, CPUID_REG_EDX, 27, 1}, {"topoext", 0x80000001, NA, CPUID_REG_ECX, 22, 1}, {"tbm", 0x80000001, NA, CPUID_REG_ECX, 21, 1}, {"nodeid", 0x80000001, NA, CPUID_REG_ECX, 19, 1}, @@ -187,6 +189,7 @@ {"nx", 0x80000001, NA, CPUID_REG_EDX, 20, 1}, {"syscall", 0x80000001, NA, CPUID_REG_EDX, 11, 1}, {"procpkg", 0x00000004, 0, CPUID_REG_EAX, 26, 6}, + {"ibpb", 0x80000008, NA, CPUID_REG_EBX, 12, 1}, {"apicidsize", 0x80000008, NA, CPUID_REG_ECX, 12, 4}, {"nc", 0x80000008, NA, CPUID_REG_ECX, 0, 8}, {"svm_npt", 0x8000000a, NA, CPUID_REG_EDX, 0, 1}, diff -Nru xen-4.9.0/tools/libxl/libxl_create.c xen-4.9.2/tools/libxl/libxl_create.c --- xen-4.9.0/tools/libxl/libxl_create.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/libxl/libxl_create.c 2018-03-28 13:10:55.000000000 +0000 @@ -451,7 +451,7 @@ vments[4] = "start_time"; vments[5] = GCSPRINTF("%lu.%02d", start_time.tv_sec,(int)start_time.tv_usec/10000); - localents = libxl__calloc(gc, 9, sizeof(char *)); + localents = libxl__calloc(gc, 11, sizeof(char *)); i = 0; localents[i++] = "platform/acpi"; localents[i++] = libxl__acpi_defbool_val(info) ? "1" : "0"; diff -Nru xen-4.9.0/tools/misc/xen-cpuid.c xen-4.9.2/tools/misc/xen-cpuid.c --- xen-4.9.0/tools/misc/xen-cpuid.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/misc/xen-cpuid.c 2018-03-28 13:10:55.000000000 +0000 @@ -144,7 +144,11 @@ { [ 0] = "clzero", - [1 ... 31] = "REZ", + [1 ... 11] = "REZ", + + [12] = "ibpb", + + [13 ... 31] = "REZ", }; static const char *str_7d0[32] = @@ -153,7 +157,11 @@ [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps", - [4 ... 31] = "REZ", + [4 ... 25] = "REZ", + + [26] = "ibrsb", [27] = "stibp", + + [28 ... 31] = "REZ", }; static struct { diff -Nru xen-4.9.0/tools/tests/x86_emulator/x86_emulate.c xen-4.9.2/tools/tests/x86_emulator/x86_emulate.c --- xen-4.9.0/tools/tests/x86_emulator/x86_emulate.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/tests/x86_emulator/x86_emulate.c 2018-03-28 13:10:55.000000000 +0000 @@ -3,7 +3,6 @@ #include #define cpu_has_amd_erratum(nr) 0 -#define mark_regs_dirty(r) ((void)(r)) #define cpu_has_mpx false #define read_bndcfgu() 0 #define xstate_set_init(what) diff -Nru xen-4.9.0/tools/xenstore/xenstored_domain.c xen-4.9.2/tools/xenstore/xenstored_domain.c --- xen-4.9.0/tools/xenstore/xenstored_domain.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/tools/xenstore/xenstored_domain.c 2018-03-28 13:10:55.000000000 +0000 @@ -221,10 +221,11 @@ static void domain_cleanup(void) { xc_dominfo_t dominfo; - struct domain *domain, *tmp; + struct domain *domain; int notify = 0; - list_for_each_entry_safe(domain, tmp, &domains, list) { + again: + list_for_each_entry(domain, &domains, list) { if (xc_domain_getinfo(*xc_handle, domain->domid, 1, &dominfo) == 1 && dominfo.domid == domain->domid) { @@ -236,8 +237,12 @@ if (!dominfo.dying) continue; } - talloc_free(domain->conn); - notify = 0; /* destroy_domain() fires the watch */ + if (domain->conn) { + talloc_unlink(talloc_autofree_context(), domain->conn); + domain->conn = NULL; + notify = 0; /* destroy_domain() fires the watch */ + goto again; + } } if (notify) diff -Nru xen-4.9.0/.travis.yml xen-4.9.2/.travis.yml --- xen-4.9.0/.travis.yml 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/.travis.yml 2018-03-28 13:10:55.000000000 +0000 @@ -71,6 +71,7 @@ - g++-5 - seabios - checkpolicy + - ghostscript # we must set CXX manually instead of using 'language: cpp' due to # travis-ci/travis-ci#3871 before_script: diff -Nru xen-4.9.0/xen/arch/arm/arm32/entry.S xen-4.9.2/xen/arch/arm/arm32/entry.S --- xen-4.9.0/xen/arch/arm/arm32/entry.S 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/arm32/entry.S 2018-03-28 13:10:55.000000000 +0000 @@ -34,6 +34,20 @@ blne save_guest_regs save_guest_regs: +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR + /* + * Restore vectors table to the default as it may have been + * changed when returning to the guest (see + * return_to_hypervisor). We need to do that early (e.g before + * any interrupts are unmasked) because hardened vectors requires + * SP to be 8 bytes aligned. This does not hold when running in + * the hypervisor. + */ + ldr r1, =hyp_traps_vector + mcr p15, 4, r1, c12, c0, 0 + isb +#endif + ldr r11, =0xffffffff /* Clobber SP which is only valid for hypervisor frames. */ str r11, [sp, #UREGS_sp] SAVE_ONE_BANKED(SP_usr) @@ -111,43 +125,37 @@ skip_check: mov pc, lr -#define DEFINE_TRAP_ENTRY(trap) \ +/* + * Macro to define trap entry. The iflags corresponds to the list of + * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask. + */ +#define __DEFINE_TRAP_ENTRY(trap, iflags) \ ALIGN; \ trap_##trap: \ SAVE_ALL; \ - cpsie i; /* local_irq_enable */ \ - cpsie a; /* asynchronous abort enable */ \ + cpsie iflags; \ adr lr, return_from_trap; \ mov r0, sp; \ + /* \ + * Save the stack pointer in r11. It will be restored after the \ + * trap has been handled (see return_from_trap). \ + */ \ mov r11, sp; \ bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ b do_trap_##trap -#define DEFINE_TRAP_ENTRY_NOIRQ(trap) \ - ALIGN; \ -trap_##trap: \ - SAVE_ALL; \ - cpsie a; /* asynchronous abort enable */ \ - adr lr, return_from_trap; \ - mov r0, sp; \ - mov r11, sp; \ - bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ - b do_trap_##trap +/* Trap handler which unmask IRQ/Abort, keep FIQ masked */ +#define DEFINE_TRAP_ENTRY(trap) __DEFINE_TRAP_ENTRY(trap, ai) -#define DEFINE_TRAP_ENTRY_NOABORT(trap) \ - ALIGN; \ -trap_##trap: \ - SAVE_ALL; \ - cpsie i; /* local_irq_enable */ \ - adr lr, return_from_trap; \ - mov r0, sp; \ - mov r11, sp; \ - bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ - b do_trap_##trap +/* Trap handler which unmask Abort, keep IRQ/FIQ masked */ +#define DEFINE_TRAP_ENTRY_NOIRQ(trap) __DEFINE_TRAP_ENTRY(trap, a) + +/* Trap handler which unmask IRQ, keep Abort/FIQ masked */ +#define DEFINE_TRAP_ENTRY_NOABORT(trap) __DEFINE_TRAP_ENTRY(trap, i) .align 5 GLOBAL(hyp_traps_vector) - .word 0 /* 0x00 - Reset */ + b trap_reset /* 0x00 - Reset */ b trap_undefined_instruction /* 0x04 - Undefined Instruction */ b trap_hypervisor_call /* 0x08 - Hypervisor Call */ b trap_prefetch_abort /* 0x0c - Prefetch Abort */ @@ -156,6 +164,66 @@ b trap_irq /* 0x18 - IRQ */ b trap_fiq /* 0x1c - FIQ */ +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + + .align 5 +GLOBAL(hyp_traps_vector_ic_inv) + /* + * We encode the exception entry in the bottom 3 bits of + * SP, and we have to guarantee to be 8 bytes aligned. + */ + add sp, sp, #1 /* Reset 7 */ + add sp, sp, #1 /* Undef 6 */ + add sp, sp, #1 /* Hypervisor call 5 */ + add sp, sp, #1 /* Prefetch abort 4 */ + add sp, sp, #1 /* Data abort 3 */ + add sp, sp, #1 /* Hypervisor 2 */ + add sp, sp, #1 /* IRQ 1 */ + nop /* FIQ 0 */ + + mcr p15, 0, r0, c7, c5, 0 /* ICIALLU */ + isb + + b decode_vectors + + .align 5 +GLOBAL(hyp_traps_vector_bp_inv) + /* + * We encode the exception entry in the bottom 3 bits of + * SP, and we have to guarantee to be 8 bytes aligned. + */ + add sp, sp, #1 /* Reset 7 */ + add sp, sp, #1 /* Undef 6 */ + add sp, sp, #1 /* Hypervisor Call 5 */ + add sp, sp, #1 /* Prefetch abort 4 */ + add sp, sp, #1 /* Data abort 3 */ + add sp, sp, #1 /* Hypervisor 2 */ + add sp, sp, #1 /* IRQ 1 */ + nop /* FIQ 0 */ + + mcr p15, 0, r0, c7, c5, 6 /* BPIALL */ + isb + +decode_vectors: +.macro vect_br val, targ + eor sp, sp, #\val + tst sp, #7 + eorne sp, sp, #\val + beq \targ +.endm + + vect_br 0, trap_fiq + vect_br 1, trap_irq + vect_br 2, trap_guest_sync + vect_br 3, trap_data_abort + vect_br 4, trap_prefetch_abort + vect_br 5, trap_hypervisor_call + vect_br 6, trap_undefined_instruction + vect_br 7, trap_reset + +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ + +DEFINE_TRAP_ENTRY(reset) DEFINE_TRAP_ENTRY(undefined_instruction) DEFINE_TRAP_ENTRY(hypervisor_call) DEFINE_TRAP_ENTRY(prefetch_abort) @@ -165,6 +233,10 @@ DEFINE_TRAP_ENTRY_NOABORT(data_abort) return_from_trap: + /* + * Restore the stack pointer from r11. It was saved on exception + * entry (see __DEFINE_TRAP_ENTRY). + */ mov sp, r11 ENTRY(return_to_new_vcpu32) ldr r11, [sp, #UREGS_cpsr] @@ -188,12 +260,37 @@ RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq); /* Fall thru */ return_to_hypervisor: - cpsid i + cpsid ai ldr lr, [sp, #UREGS_lr] ldr r11, [sp, #UREGS_pc] msr ELR_hyp, r11 ldr r11, [sp, #UREGS_cpsr] msr SPSR_hyp, r11 +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR + /* + * Hardening branch predictor may require to setup a different + * vector tables before returning to the guests. Those vectors + * may rely on the state of registers that does not hold when + * running in the hypervisor (e.g SP is 8 bytes aligned). So setup + * HVBAR very late. + * + * Default vectors table will be restored on exit (see + * save_guest_regs). + */ + mov r9, #0 /* vector tables = NULL */ + /* + * Load vector tables pointer from the per-cpu bp_harden_vecs + * when returning to the guest only. + */ + and r11, #PSR_MODE_MASK + cmp r11, #PSR_MODE_HYP + ldrne r11, =per_cpu__bp_harden_vecs + mrcne p15, 4, r10, c13, c0, 2 /* r10 = per-cpu offset (HTPIDR) */ + addne r11, r11, r10 /* r11 = offset of the vector tables */ + ldrne r9, [r11] /* r9 = vector tables */ + cmp r9, #0 /* Only update HVBAR when the vector */ + mcrne p15, 4, r9, c12, c0, 0 /* tables is not NULL. */ +#endif pop {r0-r12} add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */ clrex diff -Nru xen-4.9.0/xen/arch/arm/arm32/head.S xen-4.9.2/xen/arch/arm/arm32/head.S --- xen-4.9.0/xen/arch/arm/arm32/head.S 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/arm32/head.S 2018-03-28 13:10:55.000000000 +0000 @@ -348,6 +348,13 @@ 1: PRINT("- Turning on paging -\r\n") + /* + * The state of the TLBs is unknown before turning on the MMU. + * Flush them to avoid stale one. + */ + mcr CP32(r0, TLBIALLH) /* Flush hypervisor TLBs */ + dsb nsh + ldr r1, =paging /* Explicit vaddr, not RIP-relative */ mrc CP32(r0, HSCTLR) orr r0, r0, #(SCTLR_M|SCTLR_C) /* Enable MMU and D-cache */ diff -Nru xen-4.9.0/xen/arch/arm/arm32/livepatch.c xen-4.9.2/xen/arch/arm/arm32/livepatch.c --- xen-4.9.0/xen/arch/arm/arm32/livepatch.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/arm32/livepatch.c 2018-03-28 13:10:55.000000000 +0000 @@ -224,21 +224,21 @@ const struct livepatch_elf_sec *rela, bool use_rela) { - const Elf_RelA *r_a; - const Elf_Rel *r; - unsigned int symndx, i; - uint32_t val; - void *dest; + unsigned int i; int rc = 0; for ( i = 0; i < (rela->sec->sh_size / rela->sec->sh_entsize); i++ ) { + unsigned int symndx; + uint32_t val; + void *dest; unsigned char type; - s32 addend = 0; + s32 addend; if ( use_rela ) { - r_a = rela->data + i * rela->sec->sh_entsize; + const Elf_RelA *r_a = rela->data + i * rela->sec->sh_entsize; + symndx = ELF32_R_SYM(r_a->r_info); type = ELF32_R_TYPE(r_a->r_info); dest = base->load_addr + r_a->r_offset; /* P */ @@ -246,26 +246,38 @@ } else { - r = rela->data + i * rela->sec->sh_entsize; + const Elf_Rel *r = rela->data + i * rela->sec->sh_entsize; + symndx = ELF32_R_SYM(r->r_info); type = ELF32_R_TYPE(r->r_info); dest = base->load_addr + r->r_offset; /* P */ + addend = get_addend(type, dest); } - if ( symndx > elf->nsym ) + if ( symndx == STN_UNDEF ) + { + dprintk(XENLOG_ERR, LIVEPATCH "%s: Encountered STN_UNDEF\n", + elf->name); + return -EOPNOTSUPP; + } + else if ( symndx >= elf->nsym ) { dprintk(XENLOG_ERR, LIVEPATCH "%s: Relative symbol wants symbol@%u which is past end!\n", elf->name, symndx); return -EINVAL; } - - if ( !use_rela ) - addend = get_addend(type, dest); + else if ( !elf->sym[symndx].sym ) + { + dprintk(XENLOG_ERR, LIVEPATCH "%s: No relative symbol@%u\n", + elf->name, symndx); + return -EINVAL; + } val = elf->sym[symndx].sym->st_value; /* S */ rc = perform_rel(type, dest, val, addend); - switch ( rc ) { + switch ( rc ) + { case -EOVERFLOW: dprintk(XENLOG_ERR, LIVEPATCH "%s: Overflow in relocation %u in %s for %s!\n", elf->name, i, rela->name, base->name); @@ -275,9 +287,6 @@ dprintk(XENLOG_ERR, LIVEPATCH "%s: Unhandled relocation #%x\n", elf->name, type); break; - - default: - break; } if ( rc ) diff -Nru xen-4.9.0/xen/arch/arm/arm32/traps.c xen-4.9.2/xen/arch/arm/arm32/traps.c --- xen-4.9.0/xen/arch/arm/arm32/traps.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/arm32/traps.c 2018-03-28 13:10:55.000000000 +0000 @@ -23,6 +23,11 @@ #include +asmlinkage void do_trap_reset(struct cpu_user_regs *regs) +{ + do_unexpected_trap("Reset", regs); +} + asmlinkage void do_trap_undefined_instruction(struct cpu_user_regs *regs) { uint32_t pc = regs->pc; diff -Nru xen-4.9.0/xen/arch/arm/arm64/bpi.S xen-4.9.2/xen/arch/arm/arm64/bpi.S --- xen-4.9.0/xen/arch/arm/arm64/bpi.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/arm64/bpi.S 2018-03-28 13:10:55.000000000 +0000 @@ -0,0 +1,89 @@ +/* + * Contains CPU specific branch predictor invalidation sequences + * + * Copyright (C) 2018 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +.macro ventry target + .rept 31 + nop + .endr + b \target +.endm + +.macro vectors target + ventry \target + 0x000 + ventry \target + 0x080 + ventry \target + 0x100 + ventry \target + 0x180 + + ventry \target + 0x200 + ventry \target + 0x280 + ventry \target + 0x300 + ventry \target + 0x380 + + ventry \target + 0x400 + ventry \target + 0x480 + ventry \target + 0x500 + ventry \target + 0x580 + + ventry \target + 0x600 + ventry \target + 0x680 + ventry \target + 0x700 + ventry \target + 0x780 +.endm + +/* + * Populate 4 vector tables. This will cover up to 4 different + * micro-architectures in a system. + */ + .align 11 +ENTRY(__bp_harden_hyp_vecs_start) + .rept 4 + vectors hyp_traps_vector + .endr +ENTRY(__bp_harden_hyp_vecs_end) + +ENTRY(__psci_hyp_bp_inval_start) + sub sp, sp, #(8 * 18) + stp x16, x17, [sp, #(16 * 0)] + stp x14, x15, [sp, #(16 * 1)] + stp x12, x13, [sp, #(16 * 2)] + stp x10, x11, [sp, #(16 * 3)] + stp x8, x9, [sp, #(16 * 4)] + stp x6, x7, [sp, #(16 * 5)] + stp x4, x5, [sp, #(16 * 6)] + stp x2, x3, [sp, #(16 * 7)] + stp x0, x1, [sp, #(16 * 8)] + mov x0, #0x84000000 + smc #0 + ldp x16, x17, [sp, #(16 * 0)] + ldp x14, x15, [sp, #(16 * 1)] + ldp x12, x13, [sp, #(16 * 2)] + ldp x10, x11, [sp, #(16 * 3)] + ldp x8, x9, [sp, #(16 * 4)] + ldp x6, x7, [sp, #(16 * 5)] + ldp x4, x5, [sp, #(16 * 6)] + ldp x2, x3, [sp, #(16 * 7)] + ldp x0, x1, [sp, #(16 * 8)] + add sp, sp, #(8 * 18) +ENTRY(__psci_hyp_bp_inval_end) + +/* + * Local variables: + * mode: ASM + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/xen/arch/arm/arm64/head.S xen-4.9.2/xen/arch/arm/arm64/head.S --- xen-4.9.0/xen/arch/arm/arm64/head.S 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/arm64/head.S 2018-03-28 13:10:55.000000000 +0000 @@ -509,6 +509,13 @@ 1: PRINT("- Turning on paging -\r\n") + /* + * The state of the TLBs is unknown before turning on the MMU. + * Flush them to avoid stale one. + */ + tlbi alle2 /* Flush hypervisor TLBs */ + dsb nsh + ldr x1, =paging /* Explicit vaddr, not RIP-relative */ mrs x0, SCTLR_EL2 orr x0, x0, #SCTLR_M /* Enable MMU */ diff -Nru xen-4.9.0/xen/arch/arm/arm64/livepatch.c xen-4.9.2/xen/arch/arm/arm64/livepatch.c --- xen-4.9.0/xen/arch/arm/arm64/livepatch.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/arm64/livepatch.c 2018-03-28 13:10:55.000000000 +0000 @@ -241,32 +241,38 @@ const struct livepatch_elf_sec *base, const struct livepatch_elf_sec *rela) { - const Elf_RelA *r; - unsigned int symndx, i; - uint64_t val; - void *dest; - bool_t overflow_check; + unsigned int i; for ( i = 0; i < (rela->sec->sh_size / rela->sec->sh_entsize); i++ ) { + const Elf_RelA *r = rela->data + i * rela->sec->sh_entsize; + unsigned int symndx = ELF64_R_SYM(r->r_info); + void *dest = base->load_addr + r->r_offset; /* P */ + bool overflow_check = true; int ovf = 0; + uint64_t val; - r = rela->data + i * rela->sec->sh_entsize; - - symndx = ELF64_R_SYM(r->r_info); - - if ( symndx > elf->nsym ) + if ( symndx == STN_UNDEF ) + { + dprintk(XENLOG_ERR, LIVEPATCH "%s: Encountered STN_UNDEF\n", + elf->name); + return -EOPNOTSUPP; + } + else if ( symndx >= elf->nsym ) { dprintk(XENLOG_ERR, LIVEPATCH "%s: Relative relocation wants symbol@%u which is past end!\n", elf->name, symndx); return -EINVAL; } + else if ( !elf->sym[symndx].sym ) + { + dprintk(XENLOG_ERR, LIVEPATCH "%s: No relative symbol@%u\n", + elf->name, symndx); + return -EINVAL; + } - dest = base->load_addr + r->r_offset; /* P */ val = elf->sym[symndx].sym->st_value + r->r_addend; /* S+A */ - overflow_check = true; - /* ARM64 operations at minimum are always 32-bit. */ if ( r->r_offset >= base->sec->sh_size || (r->r_offset + sizeof(uint32_t)) > base->sec->sh_size ) @@ -403,6 +409,7 @@ case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; + /* Fallthrough. */ case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, dest, val, 12, 21, AARCH64_INSN_IMM_ADR); diff -Nru xen-4.9.0/xen/arch/arm/arm64/Makefile xen-4.9.2/xen/arch/arm/arm64/Makefile --- xen-4.9.0/xen/arch/arm/arm64/Makefile 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/arm64/Makefile 2018-03-28 13:10:55.000000000 +0000 @@ -1,6 +1,7 @@ subdir-y += lib obj-y += cache.o +obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o obj-$(EARLY_PRINTK) += debug.o obj-y += domctl.o obj-y += domain.o diff -Nru xen-4.9.0/xen/arch/arm/bootfdt.c xen-4.9.2/xen/arch/arm/bootfdt.c --- xen-4.9.0/xen/arch/arm/bootfdt.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/bootfdt.c 2018-03-28 13:10:55.000000000 +0000 @@ -109,8 +109,8 @@ continue; } - as = depth > 0 ? address_cells[depth-1] : 0; - ss = depth > 0 ? size_cells[depth-1] : 0; + as = depth > 0 ? address_cells[depth-1] : DT_ROOT_NODE_ADDR_CELLS_DEFAULT; + ss = depth > 0 ? size_cells[depth-1] : DT_ROOT_NODE_SIZE_CELLS_DEFAULT; address_cells[depth] = device_tree_get_u32(fdt, node, "#address-cells", as); diff -Nru xen-4.9.0/xen/arch/arm/cpuerrata.c xen-4.9.2/xen/arch/arm/cpuerrata.c --- xen-4.9.0/xen/arch/arm/cpuerrata.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/cpuerrata.c 2018-03-28 13:10:55.000000000 +0000 @@ -1,5 +1,215 @@ +#include +#include +#include +#include +#include +#include +#include #include #include +#include + +/* Hardening Branch predictor code for Arm64 */ +#ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR + +#define VECTOR_TABLE_SIZE SZ_2K + +/* + * Number of available table vectors (this should be in-sync with + * arch/arm64/bpi.S + */ +#define NR_BPI_HYP_VECS 4 + +extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; + +/* + * Key for each slot. This is used to find whether a specific workaround + * had a slot assigned. + * + * The key is virtual address of the vector workaround + */ +static uintptr_t bp_harden_slot_key[NR_BPI_HYP_VECS]; + +/* + * [hyp_vec_start, hyp_vec_end[ corresponds to the first 31 instructions + * of each vector. The last (i.e 32th) instruction is used to branch to + * the original entry. + * + * Those instructions will be copied on each vector to harden them. + */ +static bool copy_hyp_vect_bpi(unsigned int slot, const char *hyp_vec_start, + const char *hyp_vec_end) +{ + void *dst_remapped; + const void *dst = __bp_harden_hyp_vecs_start + slot * VECTOR_TABLE_SIZE; + unsigned int i; + mfn_t dst_mfn = _mfn(virt_to_mfn(dst)); + + BUG_ON(((hyp_vec_end - hyp_vec_start) / 4) > 31); + + /* + * Vectors are part of the text that are mapped read-only. So re-map + * the vector table to be able to update vectors. + */ + dst_remapped = __vmap(&dst_mfn, + 1UL << get_order_from_bytes(VECTOR_TABLE_SIZE), + 1, 1, PAGE_HYPERVISOR, VMAP_DEFAULT); + if ( !dst_remapped ) + return false; + + dst_remapped += (vaddr_t)dst & ~PAGE_MASK; + + for ( i = 0; i < VECTOR_TABLE_SIZE; i += 0x80 ) + { + memcpy(dst_remapped + i, hyp_vec_start, hyp_vec_end - hyp_vec_start); + } + + clean_dcache_va_range(dst_remapped, VECTOR_TABLE_SIZE); + invalidate_icache(); + + vunmap(dst_remapped); + + return true; +} + +static bool __maybe_unused +install_bp_hardening_vec(const struct arm_cpu_capabilities *entry, + const char *hyp_vec_start, + const char *hyp_vec_end) +{ + static int last_slot = -1; + static DEFINE_SPINLOCK(bp_lock); + unsigned int i, slot = -1; + bool ret = true; + + /* + * Enable callbacks are called on every CPU based on the + * capabilities. So double-check whether the CPU matches the + * entry. + */ + if ( !entry->matches(entry) ) + return true; + + /* + * No need to install hardened vector when the processor has + * ID_AA64PRF0_EL1.CSV2 set. + */ + if ( cpu_data[smp_processor_id()].pfr64.csv2 ) + return true; + + spin_lock(&bp_lock); + + /* + * Look up whether the hardening vector had a slot already + * assigned. + */ + for ( i = 0; i < 4; i++ ) + { + if ( bp_harden_slot_key[i] == (uintptr_t)hyp_vec_start ) + { + slot = i; + break; + } + } + + if ( slot == -1 ) + { + last_slot++; + /* Check we don't overrun the number of slots available. */ + BUG_ON(NR_BPI_HYP_VECS <= last_slot); + + slot = last_slot; + ret = copy_hyp_vect_bpi(slot, hyp_vec_start, hyp_vec_end); + + /* Only update the slot if the copy succeeded. */ + if ( ret ) + bp_harden_slot_key[slot] = (uintptr_t)hyp_vec_start; + } + + if ( ret ) + { + /* Install the new vector table. */ + WRITE_SYSREG((vaddr_t)(__bp_harden_hyp_vecs_start + slot * VECTOR_TABLE_SIZE), + VBAR_EL2); + isb(); + } + + spin_unlock(&bp_lock); + + return ret; +} + +extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; + +static int enable_psci_bp_hardening(void *data) +{ + bool ret = true; + static bool warned = false; + + /* + * The mitigation is using PSCI version function to invalidate the + * branch predictor. This function is only available with PSCI 0.2 + * and later. + */ + if ( psci_ver >= PSCI_VERSION(0, 2) ) + ret = install_bp_hardening_vec(data, __psci_hyp_bp_inval_start, + __psci_hyp_bp_inval_end); + else if ( !warned ) + { + ASSERT(system_state < SYS_STATE_active); + warning_add("PSCI 0.2 or later is required for the branch predictor hardening.\n"); + warned = true; + } + + return !ret; +} + +#endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */ + +/* Hardening Branch predictor code for Arm32 */ +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR + +/* + * Per-CPU vector tables to use when returning to the guests. They will + * only be used on platform requiring to harden the branch predictor. + */ +DEFINE_PER_CPU_READ_MOSTLY(const char *, bp_harden_vecs); + +extern char hyp_traps_vector_bp_inv[]; +extern char hyp_traps_vector_ic_inv[]; + +static void __maybe_unused +install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry, + const char *hyp_vecs, const char *desc) +{ + /* + * Enable callbacks are called on every CPU based on the + * capabilities. So double-check whether the CPU matches the + * entry. + */ + if ( !entry->matches(entry) ) + return; + + printk(XENLOG_INFO "CPU%u will %s on guest exit\n", + smp_processor_id(), desc); + this_cpu(bp_harden_vecs) = hyp_vecs; +} + +static int enable_bp_inv_hardening(void *data) +{ + install_bp_hardening_vecs(data, hyp_traps_vector_bp_inv, + "execute BPIALL"); + return 0; +} + +static int enable_ic_inv_hardening(void *data) +{ + install_bp_hardening_vecs(data, hyp_traps_vector_ic_inv, + "execute ICIALLU"); + return 0; +} + +#endif #define MIDR_RANGE(model, min, max) \ .matches = is_affected_midr_range, \ @@ -7,10 +217,16 @@ .midr_range_min = min, \ .midr_range_max = max +#define MIDR_ALL_VERSIONS(model) \ + .matches = is_affected_midr_range, \ + .midr_model = model, \ + .midr_range_min = 0, \ + .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) + static bool_t __maybe_unused is_affected_midr_range(const struct arm_cpu_capabilities *entry) { - return MIDR_IS_CPU_MODEL_RANGE(boot_cpu_data.midr.bits, entry->midr_model, + return MIDR_IS_CPU_MODEL_RANGE(current_cpu_data.midr.bits, entry->midr_model, entry->midr_range_min, entry->midr_range_max); } @@ -57,6 +273,45 @@ (1 << MIDR_VARIANT_SHIFT) | 2), }, #endif +#ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR + { + .capability = ARM_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + .enable = enable_psci_bp_hardening, + }, +#endif +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR + { + .capability = ARM_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A12), + .enable = enable_bp_inv_hardening, + }, + { + .capability = ARM_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A17), + .enable = enable_bp_inv_hardening, + }, + { + .capability = ARM_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A15), + .enable = enable_ic_inv_hardening, + }, +#endif {}, }; @@ -64,6 +319,12 @@ { update_cpu_capabilities(arm_errata, "enabled workaround for"); } + +void __init enable_errata_workarounds(void) +{ + enable_cpu_capabilities(arm_errata); +} + /* * Local variables: * mode: C diff -Nru xen-4.9.0/xen/arch/arm/cpufeature.c xen-4.9.2/xen/arch/arm/cpufeature.c --- xen-4.9.0/xen/arch/arm/cpufeature.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/cpufeature.c 2018-03-28 13:10:55.000000000 +0000 @@ -19,6 +19,7 @@ #include #include #include +#include #include DECLARE_BITMAP(cpu_hwcaps, ARM_NCAPS); @@ -39,6 +40,34 @@ } } +/* + * Run through the enabled capabilities and enable() it on all active + * CPUs. + */ +void __init enable_cpu_capabilities(const struct arm_cpu_capabilities *caps) +{ + for ( ; caps->matches; caps++ ) + { + if ( !cpus_have_cap(caps->capability) ) + continue; + + if ( caps->enable ) + { + int ret; + + /* + * Use stop_machine_run() as it schedules the work allowing + * us to modify PSTATE, instead of on_each_cpu() which uses + * an IPI, giving us a PSTATE that disappears when we + * return. + */ + ret = stop_machine_run(caps->enable, (void *)caps, NR_CPUS); + /* stop_machine_run should never fail at this stage of the boot. */ + BUG_ON(ret); + } + } +} + /* * Local variables: * mode: C diff -Nru xen-4.9.0/xen/arch/arm/domain.c xen-4.9.2/xen/arch/arm/domain.c --- xen-4.9.0/xen/arch/arm/domain.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/domain.c 2018-03-28 13:10:55.000000000 +0000 @@ -461,19 +461,37 @@ struct domain *alloc_domain_struct(void) { struct domain *d; + unsigned int i, max_status_frames; + BUILD_BUG_ON(sizeof(*d) > PAGE_SIZE); d = alloc_xenheap_pages(0, 0); if ( d == NULL ) return NULL; clear_page(d); - d->arch.grant_table_gfn = xzalloc_array(gfn_t, max_grant_frames); + + d->arch.grant_shared_gfn = xmalloc_array(gfn_t, max_grant_frames); + max_status_frames = grant_to_status_frames(max_grant_frames); + d->arch.grant_status_gfn = xmalloc_array(gfn_t, max_status_frames); + if ( !d->arch.grant_shared_gfn || !d->arch.grant_status_gfn ) + { + free_domain_struct(d); + return NULL; + } + + for ( i = 0; i < max_grant_frames; ++i ) + d->arch.grant_shared_gfn[i] = INVALID_GFN; + + for ( i = 0; i < max_status_frames; ++i ) + d->arch.grant_status_gfn[i] = INVALID_GFN; + return d; } void free_domain_struct(struct domain *d) { - xfree(d->arch.grant_table_gfn); + xfree(d->arch.grant_shared_gfn); + xfree(d->arch.grant_status_gfn); free_xenheap_page(d); } diff -Nru xen-4.9.0/xen/arch/arm/efi/efi-boot.h xen-4.9.2/xen/arch/arm/efi/efi-boot.h --- xen-4.9.0/xen/arch/arm/efi/efi-boot.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/efi/efi-boot.h 2018-03-28 13:10:55.000000000 +0000 @@ -596,7 +596,7 @@ { } -static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) +static void __init efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { __flush_dcache_area(vaddr, size); } diff -Nru xen-4.9.0/xen/arch/arm/gic-v3.c xen-4.9.2/xen/arch/arm/gic-v3.c --- xen-4.9.0/xen/arch/arm/gic-v3.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/gic-v3.c 2018-03-28 13:10:55.000000000 +0000 @@ -569,6 +569,13 @@ for ( i = NR_GIC_LOCAL_IRQS; i < nr_lines; i += 32 ) writel_relaxed(0xffffffff, GICD + GICD_ICENABLER + (i / 32) * 4); + /* + * Configure SPIs as non-secure Group-1. This will only matter + * if the GIC only has a single security state. + */ + for ( i = NR_GIC_LOCAL_IRQS; i < nr_lines; i += 32 ) + writel_relaxed(GENMASK(31, 0), GICD + GICD_IGROUPR + (i / 32) * 4); + gicv3_dist_wait_for_rwp(); /* Turn on the distributor */ @@ -757,6 +764,8 @@ */ writel_relaxed(0xffff0000, GICD_RDIST_SGI_BASE + GICR_ICENABLER0); writel_relaxed(0x0000ffff, GICD_RDIST_SGI_BASE + GICR_ISENABLER0); + /* Configure SGIs/PPIs as non-secure Group-1 */ + writel_relaxed(GENMASK(31, 0), GICD_RDIST_SGI_BASE + GICR_IGROUPR0); gicv3_redist_wait_for_rwp(); @@ -811,8 +820,12 @@ spin_lock(&gicv3.lock); res = gicv3_cpu_init(); + if ( res ) + goto out; + gicv3_hyp_init(); +out: spin_unlock(&gicv3.lock); return res; @@ -1637,8 +1650,12 @@ panic("GICv3: ITS: initialization failed: %d\n", res); res = gicv3_cpu_init(); + if ( res ) + goto out; + gicv3_hyp_init(); +out: spin_unlock(&gicv3.lock); return res; diff -Nru xen-4.9.0/xen/arch/arm/io.c xen-4.9.2/xen/arch/arm/io.c --- xen-4.9.0/xen/arch/arm/io.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/io.c 2018-03-28 13:10:55.000000000 +0000 @@ -79,7 +79,7 @@ if ( handler0->addr < handler1->addr ) return -1; - if ( handler0->addr > (handler1->addr + handler1->size) ) + if ( handler0->addr >= (handler1->addr + handler1->size) ) return 1; return 0; diff -Nru xen-4.9.0/xen/arch/arm/Kconfig xen-4.9.2/xen/arch/arm/Kconfig --- xen-4.9.0/xen/arch/arm/Kconfig 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/Kconfig 2018-03-28 13:10:55.000000000 +0000 @@ -164,6 +164,29 @@ endmenu +config HARDEN_BRANCH_PREDICTOR + bool "Harden the branch predictor against aliasing attacks" if EXPERT + default y + help + Speculation attacks against some high-performance processors rely on + being able to manipulate the branch predictor for a victim context by + executing aliasing branches in the attacker context. Such attacks + can be partially mitigated against by clearing internal branch + predictor state and limiting the prediction logic in some situations. + + This config option will take CPU-specific actions to harden the + branch predictor against aliasing attacks and may rely on specific + instruction sequences or control bits being set by the system + firmware. + + If unsure, say Y. + +config ARM64_HARDEN_BRANCH_PREDICTOR + def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR + +config ARM32_HARDEN_BRANCH_PREDICTOR + def_bool y if ARM_32 && HARDEN_BRANCH_PREDICTOR + source "common/Kconfig" source "drivers/Kconfig" diff -Nru xen-4.9.0/xen/arch/arm/mm.c xen-4.9.2/xen/arch/arm/mm.c --- xen-4.9.0/xen/arch/arm/mm.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/mm.c 2018-03-28 13:10:55.000000000 +0000 @@ -1148,6 +1148,7 @@ int rc; p2m_type_t t; struct page_info *page = NULL; + bool status = false; switch ( space ) { @@ -1164,7 +1165,8 @@ if ( idx < nr_status_frames(d->grant_table) ) mfn = virt_to_mfn(d->grant_table->status[idx]); else - return -EINVAL; + mfn = mfn_x(INVALID_MFN); + status = true; } else { @@ -1175,14 +1177,34 @@ if ( idx < nr_grant_frames(d->grant_table) ) mfn = virt_to_mfn(d->grant_table->shared_raw[idx]); else - return -EINVAL; + mfn = mfn_x(INVALID_MFN); } - d->arch.grant_table_gfn[idx] = gfn; + if ( mfn != mfn_x(INVALID_MFN) && + !gfn_eq(gnttab_get_frame_gfn(d, status, idx), INVALID_GFN) ) + { + rc = guest_physmap_remove_page(d, + gnttab_get_frame_gfn(d, status, idx), + _mfn(mfn), 0); + if ( rc ) + { + grant_write_unlock(d->grant_table); + return rc; + } + } - t = p2m_ram_rw; + if ( mfn != mfn_x(INVALID_MFN) ) + { + gnttab_set_frame_gfn(d, status, idx, gfn); + + t = p2m_ram_rw; + } grant_write_unlock(d->grant_table); + + if ( mfn == mfn_x(INVALID_MFN) ) + return -EINVAL; + break; case XENMAPSPACE_shared_info: if ( idx != 0 ) diff -Nru xen-4.9.0/xen/arch/arm/p2m.c xen-4.9.2/xen/arch/arm/p2m.c --- xen-4.9.0/xen/arch/arm/p2m.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/p2m.c 2018-03-28 13:10:55.000000000 +0000 @@ -1239,6 +1239,10 @@ struct p2m_domain *p2m = &d->arch.p2m; struct page_info *pg; + /* p2m not actually initialized */ + if ( !p2m->domain ) + return; + while ( (pg = page_list_remove_head(&p2m->pages)) ) free_domheap_page(pg); @@ -1250,6 +1254,8 @@ p2m_free_vmid(d); radix_tree_destroy(&p2m->mem_access_settings, NULL); + + p2m->domain = NULL; } int p2m_init(struct domain *d) @@ -1267,7 +1273,6 @@ if ( rc != 0 ) return rc; - p2m->domain = d; p2m->max_mapped_gfn = _gfn(0); p2m->lowest_mapped_gfn = _gfn(ULONG_MAX); @@ -1296,6 +1301,13 @@ for_each_possible_cpu(cpu) p2m->last_vcpu_ran[cpu] = INVALID_VCPU_ID; + /* + * Besides getting a domain when we only have the p2m in hand, + * the back pointer to domain is also used in p2m_teardown() + * as an end-of-initialization indicator. + */ + p2m->domain = d; + return rc; } @@ -1312,13 +1324,13 @@ p2m_type_t t; int rc = 0; unsigned int order; - - /* Convenience alias */ - gfn_t start = p2m->lowest_mapped_gfn; - gfn_t end = p2m->max_mapped_gfn; + gfn_t start, end; p2m_write_lock(p2m); + start = p2m->lowest_mapped_gfn; + end = p2m->max_mapped_gfn; + for ( ; gfn_x(start) < gfn_x(end); start = gfn_next_boundary(start, order) ) { @@ -1373,9 +1385,6 @@ p2m_type_t t; unsigned int order; - start = gfn_max(start, p2m->lowest_mapped_gfn); - end = gfn_min(end, p2m->max_mapped_gfn); - /* * The operation cache flush will invalidate the RAM assigned to the * guest in a given range. It will not modify the page table and @@ -1384,6 +1393,9 @@ */ p2m_read_lock(p2m); + start = gfn_max(start, p2m->lowest_mapped_gfn); + end = gfn_min(end, p2m->max_mapped_gfn); + for ( ; gfn_x(start) < gfn_x(end); start = next_gfn ) { mfn_t mfn = p2m_get_entry(p2m, start, &t, NULL, &order); diff -Nru xen-4.9.0/xen/arch/arm/setup.c xen-4.9.2/xen/arch/arm/setup.c --- xen-4.9.0/xen/arch/arm/setup.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/setup.c 2018-03-28 13:10:55.000000000 +0000 @@ -847,6 +847,7 @@ * stop_machine (tasklets initialized via an initcall). */ apply_alternatives_all(); + enable_errata_workarounds(); /* Create initial domain 0. */ /* The vGIC for DOM0 is exactly emulating the hardware GIC */ diff -Nru xen-4.9.0/xen/arch/arm/smp.c xen-4.9.2/xen/arch/arm/smp.c --- xen-4.9.0/xen/arch/arm/smp.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/smp.c 2018-03-28 13:10:55.000000000 +0000 @@ -1,3 +1,4 @@ +#include #include #include #include diff -Nru xen-4.9.0/xen/arch/arm/traps.c xen-4.9.2/xen/arch/arm/traps.c --- xen-4.9.0/xen/arch/arm/traps.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/traps.c 2018-03-28 13:10:55.000000000 +0000 @@ -157,7 +157,10 @@ void init_traps(void) { - /* Setup Hyp vector base */ + /* + * Setup Hyp vector base. Note they might get updated with the + * branch predictor hardening. + */ WRITE_SYSREG((vaddr_t)hyp_traps_vector, VBAR_EL2); /* Trap Debug and Performance Monitor accesses */ diff -Nru xen-4.9.0/xen/arch/arm/vgic.c xen-4.9.2/xen/arch/arm/vgic.c --- xen-4.9.0/xen/arch/arm/vgic.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/arm/vgic.c 2018-03-28 13:10:55.000000000 +0000 @@ -117,6 +117,13 @@ d->arch.vgic.ctlr = 0; + /* + * The vGIC relies on having a pending_irq available for every IRQ + * described in the ranks. As each rank describes 32 interrupts, we + * need to make sure the number of SPIs is a multiple of 32. + */ + nr_spis = ROUNDUP(nr_spis, 32); + /* Limit the number of virtual SPIs supported to (1020 - 32) = 988 */ if ( nr_spis > (1020 - NR_LOCAL_IRQS) ) return -EINVAL; @@ -181,7 +188,8 @@ } } - d->arch.vgic.handler->domain_free(d); + if ( d->arch.vgic.handler ) + d->arch.vgic.handler->domain_free(d); xfree(d->arch.vgic.shared_irqs); xfree(d->arch.vgic.pending_irqs); xfree(d->arch.vgic.allocated_irqs); diff -Nru xen-4.9.0/xen/arch/x86/acpi/cpu_idle.c xen-4.9.2/xen/arch/x86/acpi/cpu_idle.c --- xen-4.9.0/xen/arch/x86/acpi/cpu_idle.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/acpi/cpu_idle.c 2018-03-28 13:10:55.000000000 +0000 @@ -55,6 +55,7 @@ #include #include #include +#include /*#define DEBUG_PM_CX*/ @@ -342,7 +343,10 @@ printk("'%c' pressed -> printing ACPI Cx structures\n", key); for_each_online_cpu ( cpu ) if (processor_powers[cpu]) + { print_acpi_power(cpu, processor_powers[cpu]); + process_pending_softirqs(); + } } static int __init cpu_idle_key_init(void) @@ -404,8 +408,14 @@ */ if ( (expires > NOW() || expires == 0) && !softirq_pending(cpu) ) { + struct cpu_info *info = get_cpu_info(); + cpumask_set_cpu(cpu, &cpuidle_mwait_flags); + + spec_ctrl_enter_idle(info); __mwait(eax, ecx); + spec_ctrl_exit_idle(info); + cpumask_clear_cpu(cpu, &cpuidle_mwait_flags); } @@ -420,6 +430,8 @@ static void acpi_idle_do_entry(struct acpi_processor_cx *cx) { + struct cpu_info *info = get_cpu_info(); + switch ( cx->entry_method ) { case ACPI_CSTATE_EM_FFH: @@ -427,15 +439,19 @@ acpi_processor_ffh_cstate_enter(cx); return; case ACPI_CSTATE_EM_SYSIO: + spec_ctrl_enter_idle(info); /* IO port based C-state */ inb(cx->address); /* Dummy wait op - must do something useless after P_LVL2 read because chipsets cannot guarantee that STPCLK# signal gets asserted in time to freeze execution properly. */ inl(pmtmr_ioport); + spec_ctrl_exit_idle(info); return; case ACPI_CSTATE_EM_HALT: + spec_ctrl_enter_idle(info); safe_halt(); + spec_ctrl_exit_idle(info); local_irq_disable(); return; } @@ -563,7 +579,13 @@ if ( pm_idle_save ) pm_idle_save(); else + { + struct cpu_info *info = get_cpu_info(); + + spec_ctrl_enter_idle(info); safe_halt(); + spec_ctrl_exit_idle(info); + } return; } @@ -742,6 +764,7 @@ * Otherwise, CPU may still hold dirty data, breaking cache coherency, * leading to strange errors. */ + spec_ctrl_enter_idle(get_cpu_info()); wbinvd(); while ( 1 ) @@ -771,6 +794,7 @@ u32 address = cx->address; u32 pmtmr_ioport_local = pmtmr_ioport; + spec_ctrl_enter_idle(get_cpu_info()); wbinvd(); while ( 1 ) diff -Nru xen-4.9.0/xen/arch/x86/apic.c xen-4.9.2/xen/arch/x86/apic.c --- xen-4.9.0/xen/arch/x86/apic.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/apic.c 2018-03-28 13:10:55.000000000 +0000 @@ -687,7 +687,7 @@ printk("Leaving ESR disabled.\n"); } - if (nmi_watchdog == NMI_LOCAL_APIC) + if (nmi_watchdog == NMI_LOCAL_APIC && smp_processor_id()) setup_apic_nmi_watchdog(); apic_pm_activate(); } @@ -856,7 +856,7 @@ return -1; } - __set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); + setup_force_cpu_cap(X86_FEATURE_APIC); mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; /* The BIOS may have set up the APIC at some other address */ diff -Nru xen-4.9.0/xen/arch/x86/boot/head.S xen-4.9.2/xen/arch/x86/boot/head.S --- xen-4.9.0/xen/arch/x86/boot/head.S 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/boot/head.S 2018-03-28 13:10:55.000000000 +0000 @@ -173,10 +173,11 @@ .Lget_vtb: mov sym_esi(vga_text_buffer),%edi .Lsend_chr: - mov (%esi),%bl - test %bl,%bl # Terminate on '\0' sentinel + lodsb + test %al,%al # Terminate on '\0' sentinel je .Lhalt mov $0x3f8+5,%dx # UART Line Status Register + mov %al,%bl 2: in %dx,%al test $0x20,%al # Test THR Empty flag je 2b @@ -185,7 +186,7 @@ out %al,%dx # Send a character over the serial line test %edi,%edi # Is the VGA text buffer available? jz .Lsend_chr - movsb # Write a character to the VGA text buffer + stosb # Write a character to the VGA text buffer mov $7,%al stosb # Write an attribute to the VGA text buffer jmp .Lsend_chr diff -Nru xen-4.9.0/xen/arch/x86/boot/mem.S xen-4.9.2/xen/arch/x86/boot/mem.S --- xen-4.9.0/xen/arch/x86/boot/mem.S 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/boot/mem.S 2018-03-28 13:10:55.000000000 +0000 @@ -22,11 +22,10 @@ cmpl $SMAP,%eax # check the return is `SMAP' jne .Lmem88 - movb bootsym(e820nr),%al # up to 128 entries - cmpb $E820_BIOS_MAX,%al + incw bootsym(e820nr) + cmpw $E820_BIOS_MAX,bootsym(e820nr) # up to this many entries jae .Lmem88 - incb bootsym(e820nr) movw %di,%ax addw $20,%ax movw %ax,%di diff -Nru xen-4.9.0/xen/arch/x86/boot/trampoline.S xen-4.9.2/xen/arch/x86/boot/trampoline.S --- xen-4.9.0/xen/arch/x86/boot/trampoline.S 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/boot/trampoline.S 2018-03-28 13:10:55.000000000 +0000 @@ -153,8 +153,28 @@ .code64 start64: /* Jump to high mappings. */ - movabs $__high_start,%rax - jmpq *%rax + movabs $__high_start, %rdi + +#ifdef CONFIG_INDIRECT_THUNK + /* + * If booting virtualised, or hot-onlining a CPU, sibling threads can + * attempt Branch Target Injection against this jmp. + * + * We've got no usable stack so can't use a RETPOLINE thunk, and are + * further than disp32 from the high mappings so couldn't use + * JUMP_THUNK even if it was a non-RETPOLINE thunk. Furthermore, an + * LFENCE isn't necessarily safe to use at this point. + * + * As this isn't a hotpath, use a fully serialising event to reduce + * the speculation window as much as possible. %ebx needs preserving + * for __high_start. + */ + mov %ebx, %esi + cpuid + mov %esi, %ebx +#endif + + jmpq *%rdi #include "wakeup.S" diff -Nru xen-4.9.0/xen/arch/x86/cpu/amd.c xen-4.9.2/xen/arch/x86/cpu/amd.c --- xen-4.9.0/xen/arch/x86/cpu/amd.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/cpu/amd.c 2018-03-28 13:10:55.000000000 +0000 @@ -558,8 +558,41 @@ wrmsr_amd_safe(0xc001100d, l, h & ~1); } + /* + * Attempt to set lfence to be Dispatch Serialising. This MSR almost + * certainly isn't virtualised (and Xen at least will leak the real + * value in but silently discard writes), as well as being per-core + * rather than per-thread, so do a full safe read/write/readback cycle + * in the worst case. + */ + if (c->x86 == 0x0f || c->x86 == 0x11) + /* Always dispatch serialising on this hardare. */ + __set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability); + else /* Implicily "== 0x10 || >= 0x12" by being 64bit. */ { + if (rdmsr_safe(MSR_AMD64_DE_CFG, value)) + /* Unable to read. Assume the safer default. */ + __clear_bit(X86_FEATURE_LFENCE_DISPATCH, + c->x86_capability); + else if (value & AMD64_DE_CFG_LFENCE_SERIALISE) + /* Already dispatch serialising. */ + __set_bit(X86_FEATURE_LFENCE_DISPATCH, + c->x86_capability); + else if (wrmsr_safe(MSR_AMD64_DE_CFG, + value | AMD64_DE_CFG_LFENCE_SERIALISE) || + rdmsr_safe(MSR_AMD64_DE_CFG, value) || + !(value & AMD64_DE_CFG_LFENCE_SERIALISE)) + /* Attempt to set failed. Assume the safer default. */ + __clear_bit(X86_FEATURE_LFENCE_DISPATCH, + c->x86_capability); + else + /* Successfully enabled! */ + __set_bit(X86_FEATURE_LFENCE_DISPATCH, + c->x86_capability); + } + /* MFENCE stops RDTSC speculation */ - __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); + if (!cpu_has_lfence_dispatch) + __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); switch(c->x86) { diff -Nru xen-4.9.0/xen/arch/x86/cpu/common.c xen-4.9.2/xen/arch/x86/cpu/common.c --- xen-4.9.0/xen/arch/x86/cpu/common.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/cpu/common.c 2018-03-28 13:10:55.000000000 +0000 @@ -54,6 +54,7 @@ u64 host_pat = 0x050100070406; static unsigned int cleared_caps[NCAPINTS]; +static unsigned int forced_caps[NCAPINTS]; void __init setup_clear_cpu_cap(unsigned int cap) { @@ -63,6 +64,10 @@ if (__test_and_set_bit(cap, cleared_caps)) return; + if (test_bit(cap, forced_caps)) + printk("%pS clearing previously forced feature %#x\n", + __builtin_return_address(0), cap); + __clear_bit(cap, boot_cpu_data.x86_capability); dfs = lookup_deep_deps(cap); @@ -72,9 +77,28 @@ for (i = 0; i < FSCAPINTS; ++i) { cleared_caps[i] |= dfs[i]; boot_cpu_data.x86_capability[i] &= ~dfs[i]; + if (!(forced_caps[i] & dfs[i])) + continue; + printk("%pS implicitly clearing previously forced feature(s) %u:%#x\n", + __builtin_return_address(0), + i, forced_caps[i] & dfs[i]); } } +void __init setup_force_cpu_cap(unsigned int cap) +{ + if (__test_and_set_bit(cap, forced_caps)) + return; + + if (test_bit(cap, cleared_caps)) { + printk("%pS tries to force previously cleared feature %#x\n", + __builtin_return_address(0), cap); + return; + } + + __set_bit(cap, boot_cpu_data.x86_capability); +} + static void default_init(struct cpuinfo_x86 * c) { /* Not much we can do here... */ @@ -375,8 +399,10 @@ for (i = 0; i < FSCAPINTS; ++i) c->x86_capability[i] &= known_features[i]; - for (i = 0 ; i < NCAPINTS ; ++i) + for (i = 0 ; i < NCAPINTS ; ++i) { + c->x86_capability[i] |= forced_caps[i]; c->x86_capability[i] &= ~cleared_caps[i]; + } /* If the model name is still unset, do table lookup. */ if ( !c->x86_model_id[0] ) { @@ -453,8 +479,8 @@ initial_apicid = edx; /* Populate HT related information from sub-leaf level 0 */ - core_level_siblings = c->x86_num_siblings = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + core_level_siblings = c->x86_num_siblings = 1u << ht_mask_width; sub_index = 1; do { @@ -462,8 +488,8 @@ /* Check for the Core type in the implemented sub leaves */ if ( LEAFB_SUBTYPE(ecx) == CORE_TYPE ) { - core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + core_level_siblings = 1u << core_plus_mask_width; break; } @@ -614,6 +640,7 @@ * - Sets up TSS with stack pointers, including ISTs * - Inserts TSS selector into regular and compat GDTs * - Loads GDT, IDT, TR then null LDT + * - Sets up IST references in the IDT */ void load_system_tables(void) { @@ -676,6 +703,10 @@ asm volatile ("ltr %w0" : : "rm" (TSS_ENTRY << 3) ); asm volatile ("lldt %w0" : : "rm" (0) ); + set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF); + set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI); + set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE); + /* * Bottom-of-stack must be 16-byte aligned! * diff -Nru xen-4.9.0/xen/arch/x86/cpu/intel.c xen-4.9.2/xen/arch/x86/cpu/intel.c --- xen-4.9.0/xen/arch/x86/cpu/intel.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/cpu/intel.c 2018-03-28 13:10:55.000000000 +0000 @@ -21,13 +21,22 @@ { uint64_t x; - if (rdmsr_safe(MSR_INTEL_PLATFORM_INFO, x) || - !(x & MSR_PLATFORM_INFO_CPUID_FAULTING)) + if (rdmsr_safe(MSR_INTEL_PLATFORM_INFO, x)) return 0; + setup_force_cpu_cap(X86_FEATURE_MSR_PLATFORM_INFO); + + if (!(x & MSR_PLATFORM_INFO_CPUID_FAULTING)) { + if (!rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES, x)) + setup_force_cpu_cap(X86_FEATURE_MSR_MISC_FEATURES); + return 0; + } + + setup_force_cpu_cap(X86_FEATURE_MSR_MISC_FEATURES); + expected_levelling_cap |= LCAP_faulting; levelling_caps |= LCAP_faulting; - __set_bit(X86_FEATURE_CPUID_FAULTING, boot_cpu_data.x86_capability); + setup_force_cpu_cap(X86_FEATURE_CPUID_FAULTING); return 1; } @@ -320,9 +329,6 @@ if (c == &boot_cpu_data) intel_init_levelling(); - if (test_bit(X86_FEATURE_CPUID_FAULTING, boot_cpu_data.x86_capability)) - __set_bit(X86_FEATURE_CPUID_FAULTING, c->x86_capability); - intel_ctxt_switch_levelling(NULL); } diff -Nru xen-4.9.0/xen/arch/x86/cpu/mwait-idle.c xen-4.9.2/xen/arch/x86/cpu/mwait-idle.c --- xen-4.9.0/xen/arch/x86/cpu/mwait-idle.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/cpu/mwait-idle.c 2018-03-28 13:10:55.000000000 +0000 @@ -58,6 +58,7 @@ #include #include #include +#include #include #define MWAIT_IDLE_VERSION "0.4.1" @@ -736,7 +737,13 @@ if (pm_idle_save) pm_idle_save(); else + { + struct cpu_info *info = get_cpu_info(); + + spec_ctrl_enter_idle(info); safe_halt(); + spec_ctrl_exit_idle(info); + } return; } diff -Nru xen-4.9.0/xen/arch/x86/cpu/vpmu.c xen-4.9.2/xen/arch/x86/cpu/vpmu.c --- xen-4.9.0/xen/arch/x86/cpu/vpmu.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/cpu/vpmu.c 2018-03-28 13:10:55.000000000 +0000 @@ -226,10 +226,6 @@ if ( !vpmu->xenpmu_data ) return; - if ( !(vpmu_mode & XENPMU_MODE_ALL) && - !vpmu->arch_vpmu_ops->do_interrupt(regs) ) - return; - if ( vpmu_is_set(vpmu, VPMU_CACHED) ) return; diff -Nru xen-4.9.0/xen/arch/x86/cpuid.c xen-4.9.2/xen/arch/x86/cpuid.c --- xen-4.9.0/xen/arch/x86/cpuid.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/cpuid.c 2018-03-28 13:10:55.000000000 +0000 @@ -18,6 +18,41 @@ static const uint32_t hvm_hap_featuremask[] = INIT_HVM_HAP_FEATURES; static const uint32_t deep_features[] = INIT_DEEP_FEATURES; +static int __init parse_xen_cpuid(const char *s) +{ + const char *ss; + int val, rc = 0; + + do { + ss = strchr(s, ','); + if ( !ss ) + ss = strchr(s, '\0'); + + if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) + { + if ( !val ) + setup_clear_cpu_cap(X86_FEATURE_IBPB); + } + else if ( (val = parse_boolean("ibrsb", s, ss)) >= 0 ) + { + if ( !val ) + setup_clear_cpu_cap(X86_FEATURE_IBRSB); + } + else if ( (val = parse_boolean("stibp", s, ss)) >= 0 ) + { + if ( !val ) + setup_clear_cpu_cap(X86_FEATURE_STIBP); + } + else + rc = -EINVAL; + + s = ss + 1; + } while ( *ss ); + + return rc; +} +custom_param("cpuid", parse_xen_cpuid); + #define EMPTY_LEAF ((struct cpuid_leaf){}) static void zero_leaves(struct cpuid_leaf *l, unsigned int first, unsigned int last) @@ -354,6 +389,16 @@ /* Unconditionally claim to be able to set the hypervisor bit. */ __set_bit(X86_FEATURE_HYPERVISOR, pv_featureset); + /* On hardware with IBRS/IBPB support, there are further adjustments. */ + if ( test_bit(X86_FEATURE_IBRSB, pv_featureset) ) + { + /* Offer STIBP unconditionally. It is a nop on non-HT hardware. */ + __set_bit(X86_FEATURE_STIBP, pv_featureset); + + /* AMD's IBPB is a subset of IBRS/IBPB. */ + __set_bit(X86_FEATURE_IBPB, pv_featureset); + } + sanitise_featureset(pv_featureset); cpuid_featureset_to_policy(pv_featureset, p); recalculate_xstate(p); @@ -410,6 +455,16 @@ __clear_bit(X86_FEATURE_XSAVES, hvm_featureset); } + /* On hardware with IBRS/IBPB support, there are further adjustments. */ + if ( test_bit(X86_FEATURE_IBRSB, hvm_featureset) ) + { + /* Offer STIBP unconditionally. It is a nop on non-HT hardware. */ + __set_bit(X86_FEATURE_STIBP, hvm_featureset); + + /* AMD's IBPB is a subset of IBRS/IBPB. */ + __set_bit(X86_FEATURE_IBPB, hvm_featureset); + } + sanitise_featureset(hvm_featureset); cpuid_featureset_to_policy(hvm_featureset, p); recalculate_xstate(p); @@ -551,6 +606,14 @@ recalculate_xstate(p); recalculate_misc(p); + /* + * Override STIBP to match IBRS. Guests can safely use STIBP + * functionality on non-HT hardware, but can't necesserily protect + * themselves from SP2/Spectre/Branch Target Injection if STIBP is hidden + * on HT-capable hardware. + */ + p->feat.stibp = p->feat.ibrsb; + for ( i = 0; i < ARRAY_SIZE(p->cache.raw); ++i ) { if ( p->cache.subleaf[i].type >= 1 && diff -Nru xen-4.9.0/xen/arch/x86/domain.c xen-4.9.2/xen/arch/x86/domain.c --- xen-4.9.0/xen/arch/x86/domain.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/domain.c 2018-03-28 13:10:55.000000000 +0000 @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -63,6 +64,7 @@ #include #include #include +#include DEFINE_PER_CPU(struct vcpu *, curr_vcpu); @@ -75,9 +77,15 @@ static void default_idle(void) { + struct cpu_info *info = get_cpu_info(); + local_irq_disable(); if ( cpu_is_haltable(smp_processor_id()) ) + { + spec_ctrl_enter_idle(info); safe_halt(); + spec_ctrl_exit_idle(info); + } else local_irq_enable(); } @@ -89,6 +97,7 @@ * held by the CPUs spinning here indefinitely, and get discarded by * a subsequent INIT. */ + spec_ctrl_enter_idle(get_cpu_info()); wbinvd(); for ( ; ; ) halt(); @@ -148,7 +157,6 @@ static void noreturn continue_nonidle_domain(struct vcpu *v) { check_wakeup_from_wait(); - mark_regs_dirty(guest_cpu_user_regs()); reset_stack_and_jump(ret_from_intr); } @@ -467,8 +475,12 @@ xfree(v->arch.pv_vcpu.trap_ctxt); } else if ( !is_idle_domain(v->domain) ) + { vpmu_initialise(v); + cpuid_policy_updated(v); + } + return rc; } @@ -502,7 +514,7 @@ if ( is_hardware_domain(d) && emflags != (XEN_X86_EMU_LAPIC|XEN_X86_EMU_IOAPIC) ) return false; - if ( !is_hardware_domain(d) && emflags && + if ( !is_hardware_domain(d) && emflags != XEN_X86_EMU_ALL && emflags != XEN_X86_EMU_LAPIC ) return false; } @@ -1226,6 +1238,7 @@ rc = -ERESTART; /* Fallthrough */ case -ERESTART: + v->arch.old_guest_ptpg = NULL; v->arch.old_guest_table = pagetable_get_page(v->arch.guest_table); v->arch.guest_table = pagetable_null(); @@ -1663,6 +1676,18 @@ return rc; } +/* + * Loading a nul selector does not clear bases and limits on AMD CPUs. Be on + * the safe side and re-initialize both to flat segment values before loading + * a nul selector. + */ +#define preload_segment(seg, value) do { \ + if ( !((value) & ~3) && \ + boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) \ + asm volatile ( "movl %k0, %%" #seg \ + :: "r" (FLAT_USER_DS32) ); \ +} while ( false ) + #define loadsegment(seg,value) ({ \ int __r = 1; \ asm volatile ( \ @@ -1701,36 +1726,40 @@ /* Either selector != 0 ==> reload. */ if ( unlikely((dirty_segment_mask & DIRTY_DS) | uregs->ds) ) + { + preload_segment(ds, uregs->ds); all_segs_okay &= loadsegment(ds, uregs->ds); + } /* Either selector != 0 ==> reload. */ if ( unlikely((dirty_segment_mask & DIRTY_ES) | uregs->es) ) + { + preload_segment(es, uregs->es); all_segs_okay &= loadsegment(es, uregs->es); + } - /* - * Either selector != 0 ==> reload. - * Also reload to reset FS_BASE if it was non-zero. - */ - if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) | - uregs->fs) ) + /* Either selector != 0 ==> reload. */ + if ( unlikely((dirty_segment_mask & DIRTY_FS) | uregs->fs) ) + { all_segs_okay &= loadsegment(fs, uregs->fs); + /* non-nul selector updates fs_base */ + if ( uregs->fs & ~3 ) + dirty_segment_mask &= ~DIRTY_FS_BASE; + } - /* - * Either selector != 0 ==> reload. - * Also reload to reset GS_BASE if it was non-zero. - */ - if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) | - uregs->gs) ) + /* Either selector != 0 ==> reload. */ + if ( unlikely((dirty_segment_mask & DIRTY_GS) | uregs->gs) ) { - /* Reset GS_BASE with user %gs? */ - if ( (dirty_segment_mask & DIRTY_GS) || !n->arch.pv_vcpu.gs_base_user ) - all_segs_okay &= loadsegment(gs, uregs->gs); + all_segs_okay &= loadsegment(gs, uregs->gs); + /* non-nul selector updates gs_base_user */ + if ( uregs->gs & ~3 ) + dirty_segment_mask &= ~DIRTY_GS_BASE_USER; } if ( !is_pv_32bit_vcpu(n) ) { /* This can only be non-zero if selector is NULL. */ - if ( n->arch.pv_vcpu.fs_base ) + if ( n->arch.pv_vcpu.fs_base | (dirty_segment_mask & DIRTY_FS_BASE) ) wrfsbase(n->arch.pv_vcpu.fs_base); /* Most kernels have non-zero GS base, so don't bother testing. */ @@ -1738,7 +1767,8 @@ wrmsrl(MSR_SHADOW_GS_BASE, n->arch.pv_vcpu.gs_base_kernel); /* This can only be non-zero if selector is NULL. */ - if ( n->arch.pv_vcpu.gs_base_user ) + if ( n->arch.pv_vcpu.gs_base_user | + (dirty_segment_mask & DIRTY_GS_BASE_USER) ) wrgsbase(n->arch.pv_vcpu.gs_base_user); /* If in kernel mode then switch the GS bases around. */ @@ -1873,22 +1903,23 @@ if ( regs->fs || is_pv_32bit_vcpu(v) ) { dirty_segment_mask |= DIRTY_FS; - v->arch.pv_vcpu.fs_base = 0; /* != 0 selector kills fs_base */ + /* non-nul selector kills fs_base */ + if ( regs->fs & ~3 ) + v->arch.pv_vcpu.fs_base = 0; } - else if ( v->arch.pv_vcpu.fs_base ) - { + if ( v->arch.pv_vcpu.fs_base ) dirty_segment_mask |= DIRTY_FS_BASE; - } if ( regs->gs || is_pv_32bit_vcpu(v) ) { dirty_segment_mask |= DIRTY_GS; - v->arch.pv_vcpu.gs_base_user = 0; /* != 0 selector kills gs_base_user */ + /* non-nul selector kills gs_base_user */ + if ( regs->gs & ~3 ) + v->arch.pv_vcpu.gs_base_user = 0; } - else if ( v->arch.pv_vcpu.gs_base_user ) - { + if ( v->arch.flags & TF_kernel_mode ? v->arch.pv_vcpu.gs_base_kernel + : v->arch.pv_vcpu.gs_base_user ) dirty_segment_mask |= DIRTY_GS_BASE_USER; - } this_cpu(dirty_segment_mask) = dirty_segment_mask; } @@ -1911,10 +1942,16 @@ static void paravirt_ctxt_switch_to(struct vcpu *v) { + root_pgentry_t *root_pgt = this_cpu(root_pgt); unsigned long cr4; switch_kernel_stack(v); + if ( root_pgt ) + root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] = + l4e_from_page(v->domain->arch.perdomain_l3_pg, + __PAGE_HYPERVISOR_RW); + cr4 = pv_guest_cr4_to_real_cr4(v); if ( unlikely(cr4 != read_cr4()) ) write_cr4(cr4); @@ -1922,9 +1959,9 @@ if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) ) activate_debugregs(v); - if ( (v->domain->arch.tsc_mode == TSC_MODE_PVRDTSCP) && - boot_cpu_has(X86_FEATURE_RDTSCP) ) - write_rdtscp_aux(v->domain->arch.incarnation); + if ( cpu_has_rdtscp ) + wrmsr_tsc_aux(v->domain->arch.tsc_mode == TSC_MODE_PVRDTSCP + ? v->domain->arch.incarnation : 0); } /* Update per-VCPU guest runstate shared memory area (if registered). */ @@ -2084,6 +2121,8 @@ ASSERT(local_irq_is_enabled()); + get_cpu_info()->xen_cr3 = 0; + cpumask_copy(&dirty_mask, next->vcpu_dirty_cpumask); /* Allow at most one CPU at a time to be dirty. */ ASSERT(cpumask_weight(&dirty_mask) <= 1); @@ -2136,6 +2175,34 @@ } ctxt_switch_levelling(next); + + if ( opt_ibpb && !is_idle_domain(nextd) ) + { + static DEFINE_PER_CPU(unsigned int, last); + unsigned int *last_id = &this_cpu(last); + + /* + * Squash the domid and vcpu id together for comparison + * efficiency. We could in principle stash and compare the struct + * vcpu pointer, but this risks a false alias if a domain has died + * and the same 4k page gets reused for a new vcpu. + */ + unsigned int next_id = (((unsigned int)nextd->domain_id << 16) | + (uint16_t)next->vcpu_id); + BUILD_BUG_ON(MAX_VIRT_CPUS > 0xffff); + + /* + * When scheduling from a vcpu, to idle, and back to the same vcpu + * (which might be common in a lightly loaded system, or when + * using vcpu pinning), there is no need to issue IBPB, as we are + * returning to the same security context. + */ + if ( *last_id != next_id ) + { + wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB); + *last_id = next_id; + } + } } context_saved(prev); @@ -2426,6 +2493,16 @@ return 0; } +/* + * Called during vcpu construction, and each time the toolstack changes the + * CPUID configuration for the domain. + */ +void cpuid_policy_updated(struct vcpu *v) +{ + if ( is_hvm_vcpu(v) ) + hvm_cpuid_policy_changed(v); +} + void arch_dump_domain_info(struct domain *d) { paging_dump_domain_info(d); diff -Nru xen-4.9.0/xen/arch/x86/domctl.c xen-4.9.2/xen/arch/x86/domctl.c --- xen-4.9.0/xen/arch/x86/domctl.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/domctl.c 2018-03-28 13:10:55.000000000 +0000 @@ -53,6 +53,8 @@ struct cpuid_policy *p = d->arch.cpuid; const struct cpuid_leaf leaf = { ctl->eax, ctl->ebx, ctl->ecx, ctl->edx }; int old_vendor = p->x86_vendor; + unsigned int old_7d0 = p->feat.raw[0].d, old_e8b = p->extd.raw[8].b; + bool call_policy_changed = false; /* Avoid for_each_vcpu() unnecessarily */ /* * Skip update for leaves we don't care about. This avoids the overhead @@ -128,13 +130,7 @@ switch ( ctl->input[0] ) { case 0: - if ( is_hvm_domain(d) && (p->x86_vendor != old_vendor) ) - { - struct vcpu *v; - - for_each_vcpu( d, v ) - hvm_update_guest_vendor(v); - } + call_policy_changed = (p->x86_vendor != old_vendor); break; case 1: @@ -223,6 +219,14 @@ d->arch.pv_domain.cpuidmasks->_7ab0 = mask; } + + /* + * If the IBRS/IBPB policy has changed, we need to recalculate the MSR + * interception bitmaps. + */ + call_policy_changed = (is_hvm_domain(d) && + ((old_7d0 ^ p->feat.raw[0].d) & + cpufeat_mask(X86_FEATURE_IBRSB))); break; case 0xa: @@ -297,6 +301,24 @@ d->arch.pv_domain.cpuidmasks->e1cd = mask; } break; + + case 0x80000008: + /* + * If the IBPB policy has changed, we need to recalculate the MSR + * interception bitmaps. + */ + call_policy_changed = (is_hvm_domain(d) && + ((old_e8b ^ p->extd.raw[8].b) & + cpufeat_mask(X86_FEATURE_IBPB))); + break; + } + + if ( call_policy_changed ) + { + struct vcpu *v; + + for_each_vcpu( d, v ) + cpuid_policy_updated(v); } return 0; @@ -1275,6 +1297,8 @@ /* Count maximum number of optional msrs. */ if ( boot_cpu_has(X86_FEATURE_DBEXT) ) nr_msrs += 4; + nr_msrs += !!cpu_has_cpuid_faulting + + d->arch.cpuid->feat.ibrsb; if ( domctl->cmd == XEN_DOMCTL_get_vcpu_msrs ) { @@ -1322,6 +1346,32 @@ } } + if ( v->arch.cpuid_faulting ) + { + if ( i < vmsrs->msr_count && !ret ) + { + msr.index = MSR_INTEL_MISC_FEATURES_ENABLES; + msr.reserved = 0; + msr.value = MSR_MISC_FEATURES_CPUID_FAULTING; + if ( copy_to_guest_offset(vmsrs->msrs, i, &msr, 1) ) + ret = -EFAULT; + } + ++i; + } + + if ( d->arch.cpuid->feat.ibrsb && v->arch.spec_ctrl ) + { + if ( i < vmsrs->msr_count && !ret ) + { + msr.index = MSR_SPEC_CTRL; + msr.reserved = 0; + msr.value = v->arch.spec_ctrl; + if ( copy_to_guest_offset(vmsrs->msrs, i, &msr, 1) ) + ret = -EFAULT; + } + ++i; + } + vcpu_unpause(v); if ( i > vmsrs->msr_count && !ret ) @@ -1349,6 +1399,25 @@ switch ( msr.index ) { + case MSR_SPEC_CTRL: + if ( !d->arch.cpuid->feat.ibrsb ) + break; /* MSR available? */ + + /* + * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. + * ignored) when STIBP isn't enumerated in hardware. + */ + + if ( msr.value & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) ) + break; + v->arch.spec_ctrl = msr.value; + continue; + + case MSR_INTEL_MISC_FEATURES_ENABLES: + v->arch.cpuid_faulting = !!(msr.value & + MSR_MISC_FEATURES_CPUID_FAULTING); + continue; + case MSR_AMD64_DR0_ADDRESS_MASK: if ( !boot_cpu_has(X86_FEATURE_DBEXT) || (msr.value >> 32) ) @@ -1489,8 +1558,6 @@ bool_t compat = is_pv_32bit_domain(d); #define c(fld) (!compat ? (c.nat->fld) : (c.cmp->fld)) - if ( !is_pv_domain(d) ) - memset(c.nat, 0, sizeof(*c.nat)); memcpy(&c.nat->fpu_ctxt, v->arch.fpu_ctxt, sizeof(c.nat->fpu_ctxt)); c(flags = v->arch.vgc_flags & ~(VGCF_i387_valid|VGCF_in_kernel)); if ( v->fpu_initialised ) diff -Nru xen-4.9.0/xen/arch/x86/efi/efi-boot.h xen-4.9.2/xen/arch/x86/efi/efi-boot.h --- xen-4.9.0/xen/arch/x86/efi/efi-boot.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/efi/efi-boot.h 2018-03-28 13:10:55.000000000 +0000 @@ -87,7 +87,8 @@ case PE_BASE_RELOC_DIR64: if ( in_page_tables(addr) ) blexit(L"Unexpected relocation type"); - *(u64 *)addr += delta; + if ( delta ) + *(u64 *)addr += delta; break; default: blexit(L"Unsupported relocation type"); @@ -667,7 +668,7 @@ return 1; /* x86 always uses a config file */ } -static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { } +static void __init efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { } void __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) { diff -Nru xen-4.9.0/xen/arch/x86/extable.c xen-4.9.2/xen/arch/x86/extable.c --- xen-4.9.0/xen/arch/x86/extable.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/extable.c 2018-03-28 13:10:55.000000000 +0000 @@ -158,7 +158,7 @@ memcpy(ptr, tests[i].opc, ARRAY_SIZE(tests[i].opc)); unmap_domain_page(ptr); - asm volatile ( "call *%[stb]\n" + asm volatile ( "INDIRECT_CALL %[stb]\n" ".Lret%=:\n\t" ".pushsection .fixup,\"ax\"\n" ".Lfix%=:\n\t" @@ -167,7 +167,7 @@ ".popsection\n\t" _ASM_EXTABLE(.Lret%=, .Lfix%=) : [exn] "+m" (res) - : [stb] "rm" (addr), "a" (tests[i].rax)); + : [stb] "r" (addr), "a" (tests[i].rax)); ASSERT(res == tests[i].res.raw); } diff -Nru xen-4.9.0/xen/arch/x86/hvm/dm.c xen-4.9.2/xen/arch/x86/hvm/dm.c --- xen-4.9.0/xen/arch/x86/hvm/dm.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/dm.c 2018-03-28 13:10:55.000000000 +0000 @@ -53,42 +53,10 @@ offset_bytes, dst_bytes); } -static bool _raw_copy_to_guest_buf_offset(const struct dmop_args *args, - unsigned int buf_idx, - size_t offset_bytes, - const void *src, - size_t src_bytes) -{ - size_t buf_bytes; - - if ( buf_idx >= args->nr_bufs ) - return false; - - buf_bytes = args->buf[buf_idx].size; - - - if ( (offset_bytes + src_bytes) < offset_bytes || - (offset_bytes + src_bytes) > buf_bytes ) - return false; - - return !copy_to_guest_offset(args->buf[buf_idx].h, offset_bytes, - src, src_bytes); -} - #define COPY_FROM_GUEST_BUF_OFFSET(dst, bufs, buf_idx, offset_bytes) \ _raw_copy_from_guest_buf_offset(&(dst), bufs, buf_idx, offset_bytes, \ sizeof(dst)) -#define COPY_TO_GUEST_BUF_OFFSET(bufs, buf_idx, offset_bytes, src) \ - _raw_copy_to_guest_buf_offset(bufs, buf_idx, offset_bytes, \ - &(src), sizeof(src)) - -#define COPY_FROM_GUEST_BUF(dst, bufs, buf_idx) \ - COPY_FROM_GUEST_BUF_OFFSET(dst, bufs, buf_idx, 0) - -#define COPY_TO_GUEST_BUF(bufs, buf_idx, src) \ - COPY_TO_GUEST_BUF_OFFSET(bufs, buf_idx, 0, src) - static int track_dirty_vram(struct domain *d, xen_pfn_t first_pfn, unsigned int nr, const struct xen_dm_op_buf *buf) { @@ -178,11 +146,9 @@ struct xen_dm_op_modified_memory_extent extent; unsigned int batch_nr; xen_pfn_t pfn, end_pfn; - int rc; - rc = COPY_FROM_GUEST_BUF_OFFSET(extent, - bufs, EXTENTS_BUFFER, (*rem_extents - 1) * sizeof(extent)); - if ( rc ) + if ( !COPY_FROM_GUEST_BUF_OFFSET(extent, bufs, EXTENTS_BUFFER, + (*rem_extents - 1) * sizeof(extent)) ) return -EFAULT; if ( extent.pad ) @@ -373,6 +339,25 @@ struct xen_dm_op op; bool const_op = true; long rc; + size_t offset; + + static const uint8_t op_size[] = { + [XEN_DMOP_create_ioreq_server] = sizeof(struct xen_dm_op_create_ioreq_server), + [XEN_DMOP_get_ioreq_server_info] = sizeof(struct xen_dm_op_get_ioreq_server_info), + [XEN_DMOP_map_io_range_to_ioreq_server] = sizeof(struct xen_dm_op_ioreq_server_range), + [XEN_DMOP_unmap_io_range_from_ioreq_server] = sizeof(struct xen_dm_op_ioreq_server_range), + [XEN_DMOP_set_ioreq_server_state] = sizeof(struct xen_dm_op_set_ioreq_server_state), + [XEN_DMOP_destroy_ioreq_server] = sizeof(struct xen_dm_op_destroy_ioreq_server), + [XEN_DMOP_track_dirty_vram] = sizeof(struct xen_dm_op_track_dirty_vram), + [XEN_DMOP_set_pci_intx_level] = sizeof(struct xen_dm_op_set_pci_intx_level), + [XEN_DMOP_set_isa_irq_level] = sizeof(struct xen_dm_op_set_isa_irq_level), + [XEN_DMOP_set_pci_link_route] = sizeof(struct xen_dm_op_set_pci_link_route), + [XEN_DMOP_modified_memory] = sizeof(struct xen_dm_op_modified_memory), + [XEN_DMOP_set_mem_type] = sizeof(struct xen_dm_op_set_mem_type), + [XEN_DMOP_inject_event] = sizeof(struct xen_dm_op_inject_event), + [XEN_DMOP_inject_msi] = sizeof(struct xen_dm_op_inject_msi), + [XEN_DMOP_map_mem_type_to_ioreq_server] = sizeof(struct xen_dm_op_map_mem_type_to_ioreq_server), + }; rc = rcu_lock_remote_domain_by_id(op_args->domid, &d); if ( rc ) @@ -385,12 +370,28 @@ if ( rc ) goto out; - if ( !COPY_FROM_GUEST_BUF(op, op_args, 0) ) + offset = offsetof(struct xen_dm_op, u); + + rc = -EFAULT; + if ( op_args->buf[0].size < offset ) + goto out; + + if ( copy_from_guest_offset((void *)&op, op_args->buf[0].h, 0, offset) ) + goto out; + + if ( op.op >= ARRAY_SIZE(op_size) ) { - rc = -EFAULT; + rc = -EOPNOTSUPP; goto out; } + if ( op_args->buf[0].size < offset + op_size[op.op] ) + goto out; + + if ( copy_from_guest_offset((void *)&op.u, op_args->buf[0].h, offset, + op_size[op.op]) ) + goto out; + rc = -EINVAL; if ( op.pad ) goto out; @@ -637,7 +638,8 @@ } if ( (!rc || rc == -ERESTART) && - !const_op && !COPY_TO_GUEST_BUF(op_args, 0, op) ) + !const_op && copy_to_guest_offset(op_args->buf[0].h, offset, + (void *)&op.u, op_size[op.op]) ) rc = -EFAULT; out: diff -Nru xen-4.9.0/xen/arch/x86/hvm/emulate.c xen-4.9.2/xen/arch/x86/hvm/emulate.c --- xen-4.9.0/xen/arch/x86/hvm/emulate.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/emulate.c 2018-03-28 13:10:55.000000000 +0000 @@ -129,7 +129,7 @@ .count = *reps, .dir = dir, .df = df, - .data = data, + .data = data_is_addr ? data : 0, .data_is_ptr = data_is_addr, /* ioreq_t field name is misleading */ .state = STATE_IOREQ_READY, }; @@ -566,15 +566,16 @@ if ( pfec & (PFEC_page_paged | PFEC_page_shared) ) return X86EMUL_RETRY; done /= bytes_per_rep; - *reps = done; if ( done == 0 ) { ASSERT(!reverse); if ( npfn != gfn_x(INVALID_GFN) ) return X86EMUL_UNHANDLEABLE; + *reps = 0; x86_emul_pagefault(pfec, addr & PAGE_MASK, &hvmemul_ctxt->ctxt); return X86EMUL_EXCEPTION; } + *reps = done; break; } @@ -939,7 +940,8 @@ { struct hvm_emulate_ctxt *hvmemul_ctxt = container_of(ctxt, struct hvm_emulate_ctxt, ctxt); - unsigned int insn_off = offset - hvmemul_ctxt->insn_buf_eip; + /* Careful, as offset can wrap or truncate WRT insn_buf_eip. */ + uint8_t insn_off = offset - hvmemul_ctxt->insn_buf_eip; /* * Fall back if requested bytes are not in the prefetch cache. @@ -953,7 +955,17 @@ if ( rc == X86EMUL_OKAY && bytes ) { - ASSERT(insn_off + bytes <= sizeof(hvmemul_ctxt->insn_buf)); + /* + * Will we overflow insn_buf[]? This shouldn't be able to happen, + * which means something went wrong with instruction decoding... + */ + if ( insn_off >= sizeof(hvmemul_ctxt->insn_buf) || + insn_off + bytes > sizeof(hvmemul_ctxt->insn_buf) ) + { + ASSERT_UNREACHABLE(); + return X86EMUL_UNHANDLEABLE; + } + memcpy(&hvmemul_ctxt->insn_buf[insn_off], p_data, bytes); hvmemul_ctxt->insn_buf_bytes = insn_off + bytes; } @@ -1939,20 +1951,22 @@ vio->mmio_retry = 0; - rc = x86_emulate(&hvmemul_ctxt->ctxt, ops); - - if ( rc == X86EMUL_OKAY && vio->mmio_retry ) - rc = X86EMUL_RETRY; - if ( rc != X86EMUL_RETRY ) + switch ( rc = x86_emulate(&hvmemul_ctxt->ctxt, ops) ) { + case X86EMUL_OKAY: + if ( vio->mmio_retry ) + rc = X86EMUL_RETRY; + /* fall through */ + default: vio->mmio_cache_count = 0; vio->mmio_insn_bytes = 0; - } - else - { + break; + + case X86EMUL_RETRY: BUILD_BUG_ON(sizeof(vio->mmio_insn) < sizeof(hvmemul_ctxt->insn_buf)); vio->mmio_insn_bytes = hvmemul_ctxt->insn_buf_bytes; memcpy(vio->mmio_insn, hvmemul_ctxt->insn_buf, vio->mmio_insn_bytes); + break; } if ( hvmemul_ctxt->ctxt.retire.singlestep ) diff -Nru xen-4.9.0/xen/arch/x86/hvm/hpet.c xen-4.9.2/xen/arch/x86/hvm/hpet.c --- xen-4.9.0/xen/arch/x86/hvm/hpet.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/hpet.c 2018-03-28 13:10:55.000000000 +0000 @@ -635,14 +635,10 @@ HVM_REGISTER_SAVE_RESTORE(HPET, hpet_save, hpet_load, 1, HVMSR_PER_DOM); -void hpet_init(struct domain *d) +static void hpet_set(HPETState *h) { - HPETState *h = domain_vhpet(d); int i; - if ( !has_vhpet(d) ) - return; - memset(h, 0, sizeof(HPETState)); rwlock_init(&h->lock); @@ -668,7 +664,14 @@ h->hpet.comparator64[i] = ~0ULL; h->pt[i].source = PTSRC_isa; } +} + +void hpet_init(struct domain *d) +{ + if ( !has_vhpet(d) ) + return; + hpet_set(domain_vhpet(d)); register_mmio_handler(d, &hpet_mmio_ops); d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1; } @@ -697,8 +700,11 @@ void hpet_reset(struct domain *d) { + if ( !has_vhpet(d) ) + return; + hpet_deinit(d); - hpet_init(d); + hpet_set(domain_vhpet(d)); } /* diff -Nru xen-4.9.0/xen/arch/x86/hvm/hvm.c xen-4.9.2/xen/arch/x86/hvm/hvm.c --- xen-4.9.0/xen/arch/x86/hvm/hvm.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/hvm.c 2018-03-28 13:10:55.000000000 +0000 @@ -1323,6 +1323,7 @@ for_each_vcpu ( d, v ) { + struct hvm_save_descriptor *d = _p(&h->data[h->cur]); struct hvm_msr *ctxt; unsigned int i; @@ -1341,8 +1342,13 @@ ctxt->msr[i]._rsvd = 0; if ( ctxt->count ) + { + /* Rewrite length to indicate how much space we actually used. */ + d->length = HVM_CPU_MSR_SIZE(ctxt->count); h->cur += HVM_CPU_MSR_SIZE(ctxt->count); + } else + /* or rewind and remove the descriptor from the stream. */ h->cur -= sizeof(struct hvm_save_descriptor); } @@ -1503,8 +1509,6 @@ hvm_set_guest_tsc(v, 0); } - hvm_update_guest_vendor(v); - return 0; fail6: @@ -3418,6 +3422,22 @@ goto gp_fault; break; + case MSR_AMD_PATCHLOADER: + case MSR_IA32_UCODE_WRITE: + case MSR_PRED_CMD: + /* Write-only */ + goto gp_fault; + + case MSR_SPEC_CTRL: + if ( !d->arch.cpuid->feat.ibrsb ) + goto gp_fault; + *msr_content = v->arch.spec_ctrl; + break; + + case MSR_ARCH_CAPABILITIES: + /* Not implemented yet. */ + goto gp_fault; + case MSR_K8_ENABLE_C1E: case MSR_AMD64_NB_CFG: /* @@ -3493,7 +3513,7 @@ v->arch.hvm_vcpu.msr_tsc_aux = (uint32_t)msr_content; if ( cpu_has_rdtscp && (v->domain->arch.tsc_mode != TSC_MODE_PVRDTSCP) ) - wrmsrl(MSR_TSC_AUX, (uint32_t)msr_content); + wrmsr_tsc_aux(msr_content); break; case MSR_IA32_APICBASE: @@ -3557,6 +3577,26 @@ goto gp_fault; break; + case MSR_AMD_PATCHLOADER: + /* + * See note on MSR_IA32_UCODE_WRITE below, which may or may not apply + * to AMD CPUs as well (at least the architectural/CPUID part does). + */ + if ( d->arch.cpuid->x86_vendor != X86_VENDOR_AMD ) + goto gp_fault; + break; + + case MSR_IA32_UCODE_WRITE: + /* + * Some versions of Windows at least on certain hardware try to load + * microcode before setting up an IDT. Therefore we must not inject #GP + * for such attempts. Also the MSR is architectural and not qualified + * by any CPUID bit. + */ + if ( d->arch.cpuid->x86_vendor != X86_VENDOR_INTEL ) + goto gp_fault; + break; + case MSR_IA32_XSS: /* No XSS features currently supported for guests. */ if ( !d->arch.cpuid->xstate.xsaves || msr_content != 0 ) @@ -3570,6 +3610,37 @@ goto gp_fault; break; + case MSR_SPEC_CTRL: + if ( !d->arch.cpuid->feat.ibrsb ) + goto gp_fault; /* MSR available? */ + + /* + * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored) + * when STIBP isn't enumerated in hardware. + */ + + if ( msr_content & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) ) + goto gp_fault; /* Rsvd bit set? */ + + v->arch.spec_ctrl = msr_content; + break; + + case MSR_PRED_CMD: + if ( !d->arch.cpuid->feat.ibrsb && !d->arch.cpuid->extd.ibpb ) + goto gp_fault; /* MSR available? */ + + /* + * The only defined behaviour is when writing PRED_CMD_IBPB. In + * practice, real hardware accepts any value without faulting. + */ + if ( msr_content & PRED_CMD_IBPB ) + wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB); + break; + + case MSR_ARCH_CAPABILITIES: + /* Read-only */ + goto gp_fault; + case MSR_AMD64_NB_CFG: /* ignore the write */ break; @@ -3979,6 +4050,7 @@ printk(XENLOG_G_INFO "%pv: upcall vector %02x\n", v, op.vector); v->arch.hvm_vcpu.evtchn_upcall_vector = op.vector; + hvm_assert_evtchn_irq(v); return 0; } @@ -4484,12 +4556,18 @@ if ( a.u.enable_notify.pad || a.domain != DOMID_SELF || a.u.enable_notify.vcpu_id != curr->vcpu_id ) + { rc = -EINVAL; + break; + } if ( !gfn_eq(vcpu_altp2m(curr).veinfo_gfn, INVALID_GFN) || mfn_eq(get_gfn_query_unlocked(curr->domain, a.u.enable_notify.gfn, &p2mt), INVALID_MFN) ) - return -EINVAL; + { + rc = -EINVAL; + break; + } vcpu_altp2m(curr).veinfo_gfn = _gfn(a.u.enable_notify.gfn); altp2m_vcpu_update_vmfunc_ve(curr); diff -Nru xen-4.9.0/xen/arch/x86/hvm/intercept.c xen-4.9.2/xen/arch/x86/hvm/intercept.c --- xen-4.9.0/xen/arch/x86/hvm/intercept.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/intercept.c 2018-03-28 13:10:55.000000000 +0000 @@ -127,6 +127,7 @@ addr = (p->type == IOREQ_TYPE_COPY) ? p->addr + step * i : p->addr; + data = 0; rc = ops->read(handler, addr, p->size, &data); if ( rc != X86EMUL_OKAY ) break; @@ -161,6 +162,7 @@ { if ( p->data_is_ptr ) { + data = 0; switch ( hvm_copy_from_guest_phys(&data, p->data + step * i, p->size) ) { diff -Nru xen-4.9.0/xen/arch/x86/hvm/io.c xen-4.9.2/xen/arch/x86/hvm/io.c --- xen-4.9.0/xen/arch/x86/hvm/io.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/io.c 2018-03-28 13:10:55.000000000 +0000 @@ -88,7 +88,7 @@ rc = hvm_emulate_one(&ctxt); - if ( hvm_vcpu_io_need_completion(vio) || vio->mmio_retry ) + if ( hvm_vcpu_io_need_completion(vio) ) vio->io_completion = HVMIO_mmio_completion; else vio->mmio_access = (struct npfec){}; diff -Nru xen-4.9.0/xen/arch/x86/hvm/ioreq.c xen-4.9.2/xen/arch/x86/hvm/ioreq.c --- xen-4.9.0/xen/arch/x86/hvm/ioreq.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/ioreq.c 2018-03-28 13:10:55.000000000 +0000 @@ -820,6 +820,9 @@ struct hvm_ioreq_server *s; int rc; + if ( start > end ) + return -EINVAL; + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); rc = -ENOENT; @@ -872,6 +875,9 @@ struct hvm_ioreq_server *s; int rc; + if ( start > end ) + return -EINVAL; + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); rc = -ENOENT; diff -Nru xen-4.9.0/xen/arch/x86/hvm/irq.c xen-4.9.2/xen/arch/x86/hvm/irq.c --- xen-4.9.0/xen/arch/x86/hvm/irq.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/irq.c 2018-03-28 13:10:55.000000000 +0000 @@ -336,6 +336,7 @@ struct hvm_irq *hvm_irq = hvm_domain_irq(d); unsigned int gsi=0, pdev=0, pintx=0; uint8_t via_type; + struct vcpu *v; via_type = (uint8_t)MASK_EXTR(via, HVM_PARAM_CALLBACK_IRQ_TYPE_MASK) + 1; if ( ((via_type == HVMIRQ_callback_gsi) && (via == 0)) || @@ -398,6 +399,10 @@ spin_unlock(&d->arch.hvm_domain.irq_lock); + for_each_vcpu ( d, v ) + if ( is_vcpu_online(v) ) + hvm_assert_evtchn_irq(v); + #ifndef NDEBUG printk(XENLOG_G_INFO "Dom%u callback via changed to ", d->domain_id); switch ( via_type ) diff -Nru xen-4.9.0/xen/arch/x86/hvm/svm/entry.S xen-4.9.2/xen/arch/x86/hvm/svm/entry.S --- xen-4.9.0/xen/arch/x86/hvm/svm/entry.S 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/svm/entry.S 2018-03-28 13:10:55.000000000 +0000 @@ -77,6 +77,11 @@ or $X86_EFLAGS_MBS,%rax mov %rax,VMCB_rflags(%rcx) + mov VCPU_arch_spec_ctrl(%rbx), %eax + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ + SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ + pop %r15 pop %r14 pop %r13 @@ -96,25 +101,14 @@ VMRUN - GET_CURRENT(ax) - push %rdi - push %rsi - push %rdx - push %rcx - mov VCPU_svm_vmcb(%rax),%rcx - push %rax - push %r8 - push %r9 - push %r10 - push %r11 - push %rbx - mov %rax,%rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 + SAVE_ALL + + GET_CURRENT(bx) + + SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + mov VCPU_svm_vmcb(%rbx),%rcx movb $0,VCPU_svm_vmcb_in_sync(%rbx) mov VMCB_rax(%rcx),%rax mov %rax,UREGS_rax(%rsp) diff -Nru xen-4.9.0/xen/arch/x86/hvm/svm/svm.c xen-4.9.2/xen/arch/x86/hvm/svm/svm.c --- xen-4.9.0/xen/arch/x86/hvm/svm/svm.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/svm/svm.c 2018-03-28 13:10:55.000000000 +0000 @@ -574,6 +574,24 @@ if ( paging_mode_hap(v->domain) ) value &= ~X86_CR4_PAE; value |= v->arch.hvm_vcpu.guest_cr[4]; + + if ( !hvm_paging_enabled(v) ) + { + /* + * When the guest thinks paging is disabled, Xen may need to hide + * the effects of shadow paging, as hardware runs with the host + * paging settings, rather than the guests settings. + * + * Without CR0.PG, all memory accesses are user mode, so + * _PAGE_USER must be set in the shadow pagetables for guest + * userspace to function. This in turn trips up guest supervisor + * mode if SMEP/SMAP are left active in context. They wouldn't + * have any effect if paging was actually disabled, so hide them + * behind the back of the guest. + */ + value &= ~(X86_CR4_SMEP | X86_CR4_SMAP); + } + vmcb_set_cr4(vmcb, value); break; default: @@ -593,10 +611,11 @@ vmcb_set_efer(vmcb, new_efer); } -static void svm_update_guest_vendor(struct vcpu *v) +static void svm_cpuid_policy_changed(struct vcpu *v) { struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; struct vmcb_struct *vmcb = arch_svm->vmcb; + const struct cpuid_policy *cp = v->domain->arch.cpuid; u32 bitmap = vmcb_get_exception_intercepts(vmcb); if ( opt_hvm_fep || @@ -606,6 +625,10 @@ bitmap &= ~(1U << TRAP_invalid_op); vmcb_set_exception_intercepts(vmcb, bitmap); + + /* Give access to MSR_PRED_CMD if the guest has been told about it. */ + svm_intercept_msr(v, MSR_PRED_CMD, + cp->extd.ibpb ? MSR_INTERCEPT_NONE : MSR_INTERCEPT_RW); } static void svm_sync_vmcb(struct vcpu *v) @@ -1056,7 +1079,7 @@ svm_tsc_ratio_load(v); if ( cpu_has_rdtscp ) - wrmsrl(MSR_TSC_AUX, hvm_msr_tsc_aux(v)); + wrmsr_tsc_aux(hvm_msr_tsc_aux(v)); } static void noreturn svm_do_resume(struct vcpu *v) @@ -2045,6 +2068,13 @@ result = X86EMUL_RETRY; break; case 0: + /* + * Match up with the RDMSR side for now; ultimately this entire + * case block should go away. + */ + if ( rdmsr_safe(msr, msr_content) == 0 ) + break; + goto gpf; case 1: break; default: @@ -2372,7 +2402,7 @@ .get_shadow_gs_base = svm_get_shadow_gs_base, .update_guest_cr = svm_update_guest_cr, .update_guest_efer = svm_update_guest_efer, - .update_guest_vendor = svm_update_guest_vendor, + .cpuid_policy_changed = svm_cpuid_policy_changed, .fpu_leave = svm_fpu_leave, .set_guest_pat = svm_set_guest_pat, .get_guest_pat = svm_get_guest_pat, diff -Nru xen-4.9.0/xen/arch/x86/hvm/vlapic.c xen-4.9.2/xen/arch/x86/hvm/vlapic.c --- xen-4.9.0/xen/arch/x86/hvm/vlapic.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/vlapic.c 2018-03-28 13:10:55.000000000 +0000 @@ -149,6 +149,8 @@ if ( trig ) vlapic_set_vector(vec, &vlapic->regs->data[APIC_TMR]); + else + vlapic_clear_vector(vec, &vlapic->regs->data[APIC_TMR]); if ( hvm_funcs.update_eoi_exit_bitmap ) hvm_funcs.update_eoi_exit_bitmap(target, vec, trig); @@ -422,7 +424,7 @@ { struct domain *d = vlapic_domain(vlapic); - if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) ) + if ( vlapic_test_vector(vector, &vlapic->regs->data[APIC_TMR]) ) vioapic_update_EOI(d, vector); hvm_dpci_msi_eoi(d, vector); diff -Nru xen-4.9.0/xen/arch/x86/hvm/vmx/entry.S xen-4.9.2/xen/arch/x86/hvm/vmx/entry.S --- xen-4.9.0/xen/arch/x86/hvm/vmx/entry.S 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/vmx/entry.S 2018-03-28 13:10:55.000000000 +0000 @@ -28,27 +28,17 @@ #define VMLAUNCH .byte 0x0f,0x01,0xc2 ENTRY(vmx_asm_vmexit_handler) - push %rdi - push %rsi - push %rdx - push %rcx - push %rax + SAVE_ALL + mov %cr2,%rax - push %r8 - push %r9 - push %r10 - push %r11 - push %rbx GET_CURRENT(bx) - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 movb $1,VCPU_vmx_launched(%rbx) mov %rax,VCPU_hvm_guest_cr2(%rbx) + SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + mov %rsp,%rdi call vmx_vmexit_handler @@ -77,6 +67,12 @@ mov %rsp,%rdi call vmx_vmenter_helper + + mov VCPU_arch_spec_ctrl(%rbx), %eax + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ + SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ + mov VCPU_hvm_guest_cr2(%rbx),%rax pop %r15 @@ -108,6 +104,15 @@ .Lvmx_vmentry_fail: sti SAVE_ALL + + /* + * PV variant needed here as no guest code has executed (so + * MSR_SPEC_CTRL can't have changed value), and NMIs/MCEs are liable + * to hit (in which case the HVM variant might corrupt things). + */ + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + call vmx_vmentry_failure BUG /* vmx_vmentry_failure() shouldn't return. */ diff -Nru xen-4.9.0/xen/arch/x86/hvm/vmx/realmode.c xen-4.9.2/xen/arch/x86/hvm/vmx/realmode.c --- xen-4.9.0/xen/arch/x86/hvm/vmx/realmode.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/vmx/realmode.c 2018-03-28 13:10:55.000000000 +0000 @@ -103,7 +103,7 @@ rc = hvm_emulate_one(hvmemul_ctxt); - if ( hvm_vcpu_io_need_completion(vio) || vio->mmio_retry ) + if ( hvm_vcpu_io_need_completion(vio) ) vio->io_completion = HVMIO_realmode_completion; if ( rc == X86EMUL_UNHANDLEABLE ) diff -Nru xen-4.9.0/xen/arch/x86/hvm/vmx/vmcs.c xen-4.9.2/xen/arch/x86/hvm/vmx/vmcs.c --- xen-4.9.0/xen/arch/x86/hvm/vmx/vmcs.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/vmx/vmcs.c 2018-03-28 13:10:55.000000000 +0000 @@ -345,11 +345,19 @@ /* * "Process posted interrupt" can be set only when "virtual-interrupt - * delivery" and "acknowledge interrupt on exit" is set + * delivery" and "acknowledge interrupt on exit" is set. For the latter + * is a minimal requirement, only check the former, which is optional. */ - if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) - || !(_vmx_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT) ) - _vmx_pin_based_exec_control &= ~ PIN_BASED_POSTED_INTERRUPT; + if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ) + _vmx_pin_based_exec_control &= ~PIN_BASED_POSTED_INTERRUPT; + + if ( iommu_intpost && + !(_vmx_pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) ) + { + printk("Intel VT-d Posted Interrupt is disabled for CPU-side Posted " + "Interrupt is not enabled\n"); + iommu_intpost = 0; + } /* The IA32_VMX_VMFUNC MSR exists only when VMFUNC is available */ if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS ) diff -Nru xen-4.9.0/xen/arch/x86/hvm/vmx/vmx.c xen-4.9.2/xen/arch/x86/hvm/vmx/vmx.c --- xen-4.9.0/xen/arch/x86/hvm/vmx/vmx.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/vmx/vmx.c 2018-03-28 13:10:55.000000000 +0000 @@ -71,7 +71,6 @@ static void vmx_install_vlapic_mapping(struct vcpu *v); static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr); static void vmx_update_guest_efer(struct vcpu *v); -static void vmx_update_guest_vendor(struct vcpu *v); static void vmx_wbinvd_intercept(void); static void vmx_fpu_dirty_intercept(void); static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content); @@ -541,7 +540,7 @@ case MSR_GS_BASE: case MSR_SHADOW_GS_BASE: if ( !is_canonical_address(msr_content) ) - goto uncanonical_address; + return HNDL_exception_raised; if ( msr == MSR_FS_BASE ) __vmwrite(GUEST_FS_BASE, msr_content); @@ -559,14 +558,14 @@ case MSR_LSTAR: if ( !is_canonical_address(msr_content) ) - goto uncanonical_address; + return HNDL_exception_raised; v->arch.hvm_vmx.lstar = msr_content; wrmsrl(MSR_LSTAR, msr_content); break; case MSR_CSTAR: if ( !is_canonical_address(msr_content) ) - goto uncanonical_address; + return HNDL_exception_raised; v->arch.hvm_vmx.cstar = msr_content; break; @@ -580,11 +579,6 @@ } return HNDL_done; - - uncanonical_address: - HVM_DBG_LOG(DBG_LEVEL_MSR, "Not cano address of msr write %x", msr); - hvm_inject_hw_exception(TRAP_gp_fault, 0); - return HNDL_exception_raised; } /* @@ -627,7 +621,7 @@ } if ( cpu_has_rdtscp ) - wrmsrl(MSR_TSC_AUX, hvm_msr_tsc_aux(v)); + wrmsr_tsc_aux(hvm_msr_tsc_aux(v)); } void vmx_update_cpu_exec_control(struct vcpu *v) @@ -659,8 +653,10 @@ __vmwrite(EXCEPTION_BITMAP, bitmap); } -static void vmx_update_guest_vendor(struct vcpu *v) +static void vmx_cpuid_policy_changed(struct vcpu *v) { + const struct cpuid_policy *cp = v->domain->arch.cpuid; + if ( opt_hvm_fep || (v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) ) v->arch.hvm_vmx.exception_bitmap |= (1U << TRAP_invalid_op); @@ -670,6 +666,21 @@ vmx_vmcs_enter(v); vmx_update_exception_bitmap(v); vmx_vmcs_exit(v); + + /* + * We can safely pass MSR_SPEC_CTRL through to the guest, even if STIBP + * isn't enumerated in hardware, as SPEC_CTRL_STIBP is ignored. + */ + if ( cp->feat.ibrsb ) + vmx_disable_intercept_for_msr(v, MSR_SPEC_CTRL, MSR_TYPE_R | MSR_TYPE_W); + else + vmx_enable_intercept_for_msr(v, MSR_SPEC_CTRL, MSR_TYPE_R | MSR_TYPE_W); + + /* MSR_PRED_CMD is safe to pass through if the guest knows about it. */ + if ( cp->feat.ibrsb || cp->extd.ibpb ) + vmx_disable_intercept_for_msr(v, MSR_PRED_CMD, MSR_TYPE_R | MSR_TYPE_W); + else + vmx_enable_intercept_for_msr(v, MSR_PRED_CMD, MSR_TYPE_R | MSR_TYPE_W); } int vmx_guest_x86_mode(struct vcpu *v) @@ -903,7 +914,9 @@ static unsigned int __init vmx_init_msr(void) { - return (cpu_has_mpx && cpu_has_vmx_mpx) + + return 1 /* MISC_FEATURES_ENABLES */ + + !!boot_cpu_has(X86_FEATURE_IBRSB) + + (cpu_has_mpx && cpu_has_vmx_mpx) + (cpu_has_xsaves && cpu_has_vmx_xsaves); } @@ -911,6 +924,18 @@ { vmx_vmcs_enter(v); + if ( v->arch.cpuid_faulting ) + { + ctxt->msr[ctxt->count].index = MSR_INTEL_MISC_FEATURES_ENABLES; + ctxt->msr[ctxt->count++].val = MSR_MISC_FEATURES_CPUID_FAULTING; + } + + if ( v->domain->arch.cpuid->feat.ibrsb && v->arch.spec_ctrl ) + { + ctxt->msr[ctxt->count].index = MSR_SPEC_CTRL; + ctxt->msr[ctxt->count++].val = v->arch.spec_ctrl; + } + if ( cpu_has_mpx && cpu_has_vmx_mpx ) { __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val); @@ -939,6 +964,23 @@ { switch ( ctxt->msr[i].index ) { + case MSR_SPEC_CTRL: + if ( !v->domain->arch.cpuid->feat.ibrsb ) + err = -ENXIO; /* MSR available? */ + /* + * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. + * ignored) when STIBP isn't enumerated in hardware. + */ + else if ( ctxt->msr[i].val & + ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) ) + err = -ENXIO; + else + v->arch.spec_ctrl = ctxt->msr[i].val; + break; + case MSR_INTEL_MISC_FEATURES_ENABLES: + v->arch.cpuid_faulting = !!(ctxt->msr[i].val & + MSR_MISC_FEATURES_CPUID_FAULTING); + break; case MSR_IA32_BNDCFGS: if ( cpu_has_mpx && cpu_has_vmx_mpx && is_canonical_address(ctxt->msr[i].val) && @@ -2304,7 +2346,7 @@ .update_host_cr3 = vmx_update_host_cr3, .update_guest_cr = vmx_update_guest_cr, .update_guest_efer = vmx_update_guest_efer, - .update_guest_vendor = vmx_update_guest_vendor, + .cpuid_policy_changed = vmx_cpuid_policy_changed, .fpu_leave = vmx_fpu_leave, .set_guest_pat = vmx_set_guest_pat, .get_guest_pat = vmx_get_guest_pat, @@ -2433,6 +2475,7 @@ } static void __init lbr_tsx_fixup_check(void); +static void __init bdw_erratum_bdf14_fixup_check(void); const struct hvm_function_table * __init start_vmx(void) { @@ -2498,6 +2541,7 @@ setup_vmcs_dump(); lbr_tsx_fixup_check(); + bdw_erratum_bdf14_fixup_check(); return &vmx_function_table; } @@ -2790,7 +2834,11 @@ #define LBR_FROM_SIGNEXT_2MSB ((1ULL << 59) | (1ULL << 60)) +#define FIXUP_LBR_TSX (1u << 0) +#define FIXUP_BDW_ERRATUM_BDF14 (1u << 1) + static bool __read_mostly lbr_tsx_fixup_needed; +static bool __read_mostly bdw_erratum_bdf14_fixup_needed; static uint32_t __read_mostly lbr_from_start; static uint32_t __read_mostly lbr_from_end; static uint32_t __read_mostly lbr_lastint_from; @@ -2827,6 +2875,13 @@ } } +static void __init bdw_erratum_bdf14_fixup_check(void) +{ + /* Broadwell E5-2600 v4 processors need to work around erratum BDF14. */ + if ( boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 79 ) + bdw_erratum_bdf14_fixup_needed = true; +} + static int is_last_branch_msr(u32 ecx) { const struct lbr_info *lbr = last_branch_msr_get(); @@ -3086,8 +3141,11 @@ if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 ) { vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R | MSR_TYPE_W); - v->arch.hvm_vmx.lbr_tsx_fixup_enabled = - lbr_tsx_fixup_needed; + if ( lbr_tsx_fixup_needed ) + v->arch.hvm_vmx.lbr_fixup_enabled |= FIXUP_LBR_TSX; + if ( bdw_erratum_bdf14_fixup_needed ) + v->arch.hvm_vmx.lbr_fixup_enabled |= + FIXUP_BDW_ERRATUM_BDF14; } } @@ -3100,10 +3158,11 @@ break; } case MSR_IA32_FEATURE_CONTROL: - case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_TRUE_ENTRY_CTLS: - if ( !nvmx_msr_write_intercept(msr, msr_content) ) - goto gp_fault; - break; + case MSR_INTEL_PLATFORM_INFO: + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + /* None of these MSRs are writeable. */ + goto gp_fault; + case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7): case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(7): case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2: @@ -3114,12 +3173,6 @@ goto gp_fault; break; - case MSR_INTEL_PLATFORM_INFO: - if ( msr_content || - rdmsr_safe(MSR_INTEL_PLATFORM_INFO, msr_content) ) - goto gp_fault; - break; - case MSR_INTEL_MISC_FEATURES_ENABLES: { bool old_cpuid_faulting = v->arch.cpuid_faulting; @@ -3152,6 +3205,13 @@ case -ERESTART: return X86EMUL_RETRY; case 0: + /* + * Match up with the RDMSR side for now; ultimately this + * entire case block should go away. + */ + if ( rdmsr_safe(msr, msr_content) == 0 ) + break; + goto gp_fault; case 1: break; default: @@ -4173,6 +4233,44 @@ msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2); } +static void sign_extend_msr(u32 msr, int type) +{ + struct vmx_msr_entry *entry; + + if ( (entry = vmx_find_msr(msr, type)) != NULL ) + { + if ( entry->data & VADDR_TOP_BIT ) + entry->data |= CANONICAL_MASK; + else + entry->data &= ~CANONICAL_MASK; + } +} + +static void bdw_erratum_bdf14_fixup(void) +{ + /* + * Occasionally, on certain Broadwell CPUs MSR_IA32_LASTINTTOIP has + * been observed to have the top three bits corrupted as though the + * MSR is using the LBR_FORMAT_EIP_FLAGS_TSX format. This is + * incorrect and causes a vmentry failure -- the MSR should contain + * an offset into the current code segment. This is assumed to be + * erratum BDF14. Fix up MSR_IA32_LASTINT{FROM,TO}IP by + * sign-extending into bits 48:63. + */ + sign_extend_msr(MSR_IA32_LASTINTFROMIP, VMX_GUEST_MSR); + sign_extend_msr(MSR_IA32_LASTINTTOIP, VMX_GUEST_MSR); +} + +static void lbr_fixup(void) +{ + struct vcpu *curr = current; + + if ( curr->arch.hvm_vmx.lbr_fixup_enabled & FIXUP_LBR_TSX ) + lbr_tsx_fixup(); + if ( curr->arch.hvm_vmx.lbr_fixup_enabled & FIXUP_BDW_ERRATUM_BDF14 ) + bdw_erratum_bdf14_fixup(); +} + void vmx_vmenter_helper(const struct cpu_user_regs *regs) { struct vcpu *curr = current; @@ -4229,8 +4327,8 @@ } out: - if ( unlikely(curr->arch.hvm_vmx.lbr_tsx_fixup_enabled) ) - lbr_tsx_fixup(); + if ( unlikely(curr->arch.hvm_vmx.lbr_fixup_enabled) ) + lbr_fixup(); HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0); diff -Nru xen-4.9.0/xen/arch/x86/hvm/vmx/vvmx.c xen-4.9.2/xen/arch/x86/hvm/vmx/vvmx.c --- xen-4.9.0/xen/arch/x86/hvm/vmx/vvmx.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/hvm/vmx/vvmx.c 2018-03-28 13:10:55.000000000 +0000 @@ -631,6 +631,7 @@ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; host_cntrl &= ~apicv_bit; + host_cntrl &= ~SECONDARY_EXEC_ENABLE_VMCS_SHADOWING; shadow_cntrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL); /* No vAPIC-v support, so it shouldn't be set in vmcs12. */ @@ -2123,12 +2124,6 @@ return r; } -int nvmx_msr_write_intercept(unsigned int msr, u64 msr_content) -{ - /* silently ignore for now */ - return 1; -} - /* This function uses L2_gpa to walk the P2M page table in L1. If the * walk is successful, the translated value is returned in * L1_gpa. The result value tells what to do next. diff -Nru xen-4.9.0/xen/arch/x86/indirect-thunk.S xen-4.9.2/xen/arch/x86/indirect-thunk.S --- xen-4.9.0/xen/arch/x86/indirect-thunk.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/indirect-thunk.S 2018-03-28 13:10:55.000000000 +0000 @@ -0,0 +1,49 @@ +/* + * Implement __x86_indirect_thunk_* symbols for use with compatbile compilers + * and the -mindirect-branch=thunk-extern -mindirect-branch-register options. + * + * Copyright (c) 2017-2018 Citrix Systems Ltd. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + .file __FILE__ + +#include + +.macro IND_THUNK_RETPOLINE reg:req + call 2f +1: + lfence + jmp 1b +2: + mov %\reg, (%rsp) + ret +.endm + +.macro IND_THUNK_LFENCE reg:req + lfence + jmp *%\reg +.endm + +.macro IND_THUNK_JMP reg:req + jmp *%\reg +.endm + +/* + * Build the __x86.indirect_thunk.* symbols. Execution lands on an + * alternative patch point which implements one of the above THUNK_*'s + */ +.macro GEN_INDIRECT_THUNK reg:req + .section .text.__x86_indirect_thunk_\reg, "ax", @progbits + +ENTRY(__x86_indirect_thunk_\reg) + ALTERNATIVE_2 __stringify(IND_THUNK_RETPOLINE \reg), \ + __stringify(IND_THUNK_LFENCE \reg), X86_FEATURE_IND_THUNK_LFENCE, \ + __stringify(IND_THUNK_JMP \reg), X86_FEATURE_IND_THUNK_JMP +.endm + +/* Instantiate GEN_INDIRECT_THUNK for each register except %rsp. */ +.irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 + GEN_INDIRECT_THUNK reg=r\reg +.endr diff -Nru xen-4.9.0/xen/arch/x86/irq.c xen-4.9.2/xen/arch/x86/irq.c --- xen-4.9.0/xen/arch/x86/irq.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/irq.c 2018-03-28 13:10:55.000000000 +0000 @@ -1251,7 +1251,8 @@ return -ENOMEM; } *pinfo = info; - return 0; + + return !!err; } static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq) @@ -1294,7 +1295,10 @@ continue; err = prepare_domain_irq_pirq(d, i, i, &info); if ( err ) + { + ASSERT(err < 0); break; + } set_domain_irq_pirq(d, i, info); } @@ -1902,6 +1906,8 @@ struct pirq *info; struct irq_desc *desc; unsigned long flags; + DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {}; + DECLARE_BITMAP(granted, MAX_MSI_IRQS) = {}; ASSERT(spin_is_locked(&d->event_lock)); @@ -1935,18 +1941,24 @@ return ret; } - ret = irq_permit_access(d, irq); - if ( ret ) + if ( likely(!irq_access_permitted(d, irq)) ) { - printk(XENLOG_G_ERR - "dom%d: could not permit access to IRQ%d (pirq %d)\n", - d->domain_id, irq, pirq); - return ret; + ret = irq_permit_access(d, irq); + if ( ret ) + { + printk(XENLOG_G_ERR + "dom%d: could not permit access to IRQ%d (pirq %d)\n", + d->domain_id, irq, pirq); + return ret; + } + __set_bit(0, granted); } ret = prepare_domain_irq_pirq(d, irq, pirq, &info); - if ( ret ) + if ( ret < 0 ) goto revoke; + if ( !ret ) + __set_bit(0, prepared); desc = irq_to_desc(irq); @@ -1963,7 +1975,10 @@ if ( !cpu_has_apic ) goto done; - pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); + pdev = pci_get_pdev_by_domain(d, msi->seg, msi->bus, msi->devfn); + if ( !pdev ) + goto done; + ret = pci_enable_msi(msi, &msi_desc); if ( ret ) { @@ -2015,14 +2030,21 @@ irq = create_irq(NUMA_NO_NODE); ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info) : irq; - if ( ret ) + if ( ret < 0 ) break; + if ( !ret ) + __set_bit(nr, prepared); msi_desc[nr].irq = irq; - if ( irq_permit_access(d, irq) != 0 ) - printk(XENLOG_G_WARNING - "dom%d: could not permit access to IRQ%d (pirq %d)\n", - d->domain_id, irq, pirq); + if ( likely(!irq_access_permitted(d, irq)) ) + { + if ( irq_permit_access(d, irq) ) + printk(XENLOG_G_WARNING + "dom%d: could not permit access to IRQ%d (pirq %d)\n", + d->domain_id, irq, pirq); + else + __set_bit(nr, granted); + } desc = irq_to_desc(irq); spin_lock_irqsave(&desc->lock, flags); @@ -2049,15 +2071,16 @@ desc->msi_desc = NULL; spin_unlock_irqrestore(&desc->lock, flags); } - while ( nr-- ) + while ( nr ) { - if ( irq >= 0 && irq_deny_access(d, irq) ) + if ( irq >= 0 && test_bit(nr, granted) && + irq_deny_access(d, irq) ) printk(XENLOG_G_ERR "dom%d: could not revoke access to IRQ%d (pirq %d)\n", d->domain_id, irq, pirq); - if ( info ) + if ( info && test_bit(nr, prepared) ) cleanup_domain_irq_pirq(d, irq, info); - info = pirq_info(d, pirq + nr); + info = pirq_info(d, pirq + --nr); irq = info->arch.irq; } msi_desc->irq = -1; @@ -2073,14 +2096,16 @@ spin_lock_irqsave(&desc->lock, flags); set_domain_irq_pirq(d, irq, info); spin_unlock_irqrestore(&desc->lock, flags); + ret = 0; } done: if ( ret ) { - cleanup_domain_irq_pirq(d, irq, info); + if ( test_bit(0, prepared) ) + cleanup_domain_irq_pirq(d, irq, info); revoke: - if ( irq_deny_access(d, irq) ) + if ( test_bit(0, granted) && irq_deny_access(d, irq) ) printk(XENLOG_G_ERR "dom%d: could not revoke access to IRQ%d (pirq %d)\n", d->domain_id, irq, pirq); @@ -2129,7 +2154,8 @@ nr = msi_desc->msi.nvec; } - ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc); + ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, + msi_desc ? msi_desc->dev : NULL); if ( ret ) goto done; diff -Nru xen-4.9.0/xen/arch/x86/Kconfig xen-4.9.2/xen/arch/x86/Kconfig --- xen-4.9.0/xen/arch/x86/Kconfig 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/Kconfig 2018-03-28 13:10:55.000000000 +0000 @@ -35,6 +35,26 @@ config PV def_bool y +config PV_LINEAR_PT + bool "Support for PV linear pagetables" + depends on PV + default y + ---help--- + Linear pagetables (also called "recursive pagetables") refers + to the practice of a guest operating system having pagetable + entries pointing to other pagetables of the same level (i.e., + allowing L2 PTEs to point to other L2 pages). Some operating + systems use it as a simple way to consistently map the current + process's pagetables into its own virtual address space. + + Linux and MiniOS don't use this technique. NetBSD and Novell + Netware do; there may be other custom operating systems which + do. If you're certain you don't plan on having PV guests + which use this feature, turning it off can reduce the attack + surface. + + If unsure, say Y. + config HVM def_bool y diff -Nru xen-4.9.0/xen/arch/x86/livepatch.c xen-4.9.2/xen/arch/x86/livepatch.c --- xen-4.9.0/xen/arch/x86/livepatch.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/livepatch.c 2018-03-28 13:10:55.000000000 +0000 @@ -151,25 +151,34 @@ const struct livepatch_elf_sec *base, const struct livepatch_elf_sec *rela) { - const Elf_RelA *r; - unsigned int symndx, i; - uint64_t val; - uint8_t *dest; + unsigned int i; for ( i = 0; i < (rela->sec->sh_size / rela->sec->sh_entsize); i++ ) { - r = rela->data + i * rela->sec->sh_entsize; + const Elf_RelA *r = rela->data + i * rela->sec->sh_entsize; + unsigned int symndx = ELF64_R_SYM(r->r_info); + uint8_t *dest = base->load_addr + r->r_offset; + uint64_t val; - symndx = ELF64_R_SYM(r->r_info); - - if ( symndx > elf->nsym ) + if ( symndx == STN_UNDEF ) + { + dprintk(XENLOG_ERR, LIVEPATCH "%s: Encountered STN_UNDEF\n", + elf->name); + return -EOPNOTSUPP; + } + else if ( symndx >= elf->nsym ) { dprintk(XENLOG_ERR, LIVEPATCH "%s: Relative relocation wants symbol@%u which is past end!\n", elf->name, symndx); return -EINVAL; } + else if ( !elf->sym[symndx].sym ) + { + dprintk(XENLOG_ERR, LIVEPATCH "%s: No symbol@%u\n", + elf->name, symndx); + return -EINVAL; + } - dest = base->load_addr + r->r_offset; val = r->r_addend + elf->sym[symndx].sym->st_value; switch ( ELF64_R_TYPE(r->r_info) ) diff -Nru xen-4.9.0/xen/arch/x86/Makefile xen-4.9.2/xen/arch/x86/Makefile --- xen-4.9.0/xen/arch/x86/Makefile 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/Makefile 2018-03-28 13:10:55.000000000 +0000 @@ -35,6 +35,7 @@ obj-y += io_apic.o obj-$(CONFIG_LIVEPATCH) += alternative.o livepatch.o obj-y += msi.o +obj-$(CONFIG_INDIRECT_THUNK) += indirect-thunk.o obj-y += ioport_emulate.o obj-y += irq.o obj-$(CONFIG_KEXEC) += machine_kexec.o @@ -55,6 +56,7 @@ obj-y += shutdown.o obj-y += smp.o obj-y += smpboot.o +obj-y += spec_ctrl.o obj-y += srat.o obj-y += string.o obj-y += sysctl.o diff -Nru xen-4.9.0/xen/arch/x86/microcode_amd.c xen-4.9.2/xen/arch/x86/microcode_amd.c --- xen-4.9.0/xen/arch/x86/microcode_amd.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/microcode_amd.c 2018-03-28 13:10:55.000000000 +0000 @@ -107,6 +107,7 @@ #define F14H_MPB_MAX_SIZE 1824 #define F15H_MPB_MAX_SIZE 4096 #define F16H_MPB_MAX_SIZE 3458 +#define F17H_MPB_MAX_SIZE 3200 switch (boot_cpu_data.x86) { @@ -119,6 +120,9 @@ case 0x16: max_size = F16H_MPB_MAX_SIZE; break; + case 0x17: + max_size = F17H_MPB_MAX_SIZE; + break; default: max_size = F1XH_MPB_MAX_SIZE; break; diff -Nru xen-4.9.0/xen/arch/x86/mm/hap/hap.c xen-4.9.2/xen/arch/x86/mm/hap/hap.c --- xen-4.9.0/xen/arch/x86/mm/hap/hap.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/mm/hap/hap.c 2018-03-28 13:10:55.000000000 +0000 @@ -289,8 +289,7 @@ { d->arch.paging.hap.total_pages--; d->arch.paging.hap.p2m_pages++; - page_set_owner(pg, d); - pg->count_info |= 1; + ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask)); } else if ( !d->arch.paging.p2m_alloc_failed ) { @@ -305,21 +304,23 @@ static void hap_free_p2m_page(struct domain *d, struct page_info *pg) { + struct domain *owner = page_get_owner(pg); + /* This is called both from the p2m code (which never holds the * paging lock) and the log-dirty code (which always does). */ paging_lock_recursive(d); - ASSERT(page_get_owner(pg) == d); - /* Should have just the one ref we gave it in alloc_p2m_page() */ - if ( (pg->count_info & PGC_count_mask) != 1 ) { - HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n", - pg, pg->count_info, pg->u.inuse.type_info); + /* Should still have no owner and count zero. */ + if ( owner || (pg->count_info & PGC_count_mask) ) + { + HAP_ERROR("d%d: Odd p2m page %"PRI_mfn" d=%d c=%lx t=%"PRtype_info"\n", + d->domain_id, mfn_x(page_to_mfn(pg)), + owner ? owner->domain_id : DOMID_INVALID, + pg->count_info, pg->u.inuse.type_info); WARN(); + pg->count_info &= ~PGC_count_mask; + page_set_owner(pg, NULL); } - pg->count_info &= ~PGC_count_mask; - /* Free should not decrement domain's total allocation, since - * these pages were allocated without an owner. */ - page_set_owner(pg, NULL); d->arch.paging.hap.p2m_pages--; d->arch.paging.hap.total_pages++; hap_free(d, page_to_mfn(pg)); diff -Nru xen-4.9.0/xen/arch/x86/mm/p2m-pod.c xen-4.9.2/xen/arch/x86/mm/p2m-pod.c --- xen-4.9.0/xen/arch/x86/mm/p2m-pod.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/mm/p2m-pod.c 2018-03-28 13:10:55.000000000 +0000 @@ -555,11 +555,23 @@ if ( !nonpod ) { - /* All PoD: Mark the whole region invalid and tell caller - * we're done. */ - p2m_set_entry(p2m, gpfn, INVALID_MFN, order, p2m_invalid, - p2m->default_access); - p2m->pod.entry_count-=(1<default_access) ) + { + /* + * If this fails, we can't tell how much of the range was changed. + * Best to crash the domain unless we're sure a partial change is + * impossible. + */ + if ( order != 0 ) + domain_crash(d); + goto out_unlock; + } + p2m->pod.entry_count -= 1UL << order; BUG_ON(p2m->pod.entry_count < 0); ret = 1; goto out_entry_check; @@ -600,8 +612,14 @@ n = 1UL << cur_order; if ( t == p2m_populate_on_demand ) { - p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, - p2m_invalid, p2m->default_access); + /* This shouldn't be able to fail */ + if ( p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, + p2m_invalid, p2m->default_access) ) + { + ASSERT_UNREACHABLE(); + domain_crash(d); + goto out_unlock; + } p2m->pod.entry_count -= n; BUG_ON(p2m->pod.entry_count < 0); pod -= n; @@ -622,8 +640,14 @@ page = mfn_to_page(mfn); - p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, - p2m_invalid, p2m->default_access); + /* This shouldn't be able to fail */ + if ( p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, + p2m_invalid, p2m->default_access) ) + { + ASSERT_UNREACHABLE(); + domain_crash(d); + goto out_unlock; + } p2m_tlb_flush_sync(p2m); for ( j = 0; j < n; ++j ) set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY); @@ -752,8 +776,10 @@ } /* Try to remove the page, restoring old mapping if it fails. */ - p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_2M, - p2m_populate_on_demand, p2m->default_access); + if ( p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_2M, + p2m_populate_on_demand, p2m->default_access) ) + goto out; + p2m_tlb_flush_sync(p2m); /* Make none of the MFNs are used elsewhere... for example, mapped @@ -810,9 +836,18 @@ ret = SUPERPAGE_PAGES; out_reset: - if ( reset ) - p2m_set_entry(p2m, gfn, mfn0, 9, type0, p2m->default_access); - + /* + * This p2m_set_entry() call shouldn't be able to fail, since the same order + * on the same gfn succeeded above. If that turns out to be false, crashing + * the domain should be the safest way of making sure we don't leak memory. + */ + if ( reset && p2m_set_entry(p2m, gfn, mfn0, PAGE_ORDER_2M, + type0, p2m->default_access) ) + { + ASSERT_UNREACHABLE(); + domain_crash(d); + } + out: gfn_unlock(p2m, gfn, SUPERPAGE_ORDER); return ret; @@ -869,19 +904,30 @@ } /* Try to remove the page, restoring old mapping if it fails. */ - p2m_set_entry(p2m, gfns[i], INVALID_MFN, PAGE_ORDER_4K, - p2m_populate_on_demand, p2m->default_access); + if ( p2m_set_entry(p2m, gfns[i], INVALID_MFN, PAGE_ORDER_4K, + p2m_populate_on_demand, p2m->default_access) ) + goto skip; /* See if the page was successfully unmapped. (Allow one refcount * for being allocated to a domain.) */ if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 ) { + /* + * If the previous p2m_set_entry call succeeded, this one shouldn't + * be able to fail. If it does, crashing the domain should be safe. + */ + if ( p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, + types[i], p2m->default_access) ) + { + ASSERT_UNREACHABLE(); + domain_crash(d); + goto out_unmap; + } + + skip: unmap_domain_page(map[i]); map[i] = NULL; - p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, - types[i], p2m->default_access); - continue; } } @@ -900,12 +946,25 @@ unmap_domain_page(map[i]); - /* See comment in p2m_pod_zero_check_superpage() re gnttab - * check timing. */ - if ( j < PAGE_SIZE/sizeof(*map[i]) ) + map[i] = NULL; + + /* + * See comment in p2m_pod_zero_check_superpage() re gnttab + * check timing. + */ + if ( j < (PAGE_SIZE / sizeof(*map[i])) ) { - p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, - types[i], p2m->default_access); + /* + * If the previous p2m_set_entry call succeeded, this one shouldn't + * be able to fail. If it does, crashing the domain should be safe. + */ + if ( p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, + types[i], p2m->default_access) ) + { + ASSERT_UNREACHABLE(); + domain_crash(d); + goto out_unmap; + } } else { @@ -929,7 +988,17 @@ p2m->pod.entry_count++; } } - + + return; + +out_unmap: + /* + * Something went wrong, probably crashing the domain. Unmap + * everything and return. + */ + for ( i = 0; i < count; i++ ) + if ( map[i] ) + unmap_domain_page(map[i]); } #define POD_SWEEP_LIMIT 1024 @@ -1071,9 +1140,8 @@ * NOTE: In a fine-grained p2m locking scenario this operation * may need to promote its locking from gfn->1g superpage */ - p2m_set_entry(p2m, gfn_aligned, INVALID_MFN, PAGE_ORDER_2M, - p2m_populate_on_demand, p2m->default_access); - return 0; + return p2m_set_entry(p2m, gfn_aligned, INVALID_MFN, PAGE_ORDER_2M, + p2m_populate_on_demand, p2m->default_access); } /* Only reclaim if we're in actual need of more cache. */ @@ -1104,8 +1172,12 @@ gfn_aligned = (gfn >> order) << order; - p2m_set_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw, - p2m->default_access); + if ( p2m_set_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw, + p2m->default_access) ) + { + p2m_pod_cache_add(p2m, p, order); + goto out_fail; + } for( i = 0; i < (1UL << order); i++ ) { @@ -1150,13 +1222,18 @@ BUG_ON(order != PAGE_ORDER_2M); pod_unlock(p2m); - /* Remap this 2-meg region in singleton chunks */ - /* NOTE: In a p2m fine-grained lock scenario this might - * need promoting the gfn lock from gfn->2M superpage */ + /* + * Remap this 2-meg region in singleton chunks. See the comment on the + * 1G page splitting path above for why a single call suffices. + * + * NOTE: In a p2m fine-grained lock scenario this might + * need promoting the gfn lock from gfn->2M superpage. + */ gfn_aligned = (gfn>>order)<default_access); + if ( p2m_set_entry(p2m, gfn_aligned, INVALID_MFN, PAGE_ORDER_4K, + p2m_populate_on_demand, p2m->default_access) ) + return -1; + if ( tb_init_done ) { struct { diff -Nru xen-4.9.0/xen/arch/x86/mm/paging.c xen-4.9.2/xen/arch/x86/mm/paging.c --- xen-4.9.0/xen/arch/x86/mm/paging.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/mm/paging.c 2018-03-28 13:10:55.000000000 +0000 @@ -274,7 +274,7 @@ return; /* Shared MFNs should NEVER be marked dirty */ - BUG_ON(SHARED_M2P(pfn_x(pfn))); + BUG_ON(paging_mode_translate(d) && SHARED_M2P(pfn_x(pfn))); /* * Values with the MSB set denote MFNs that aren't really part of the diff -Nru xen-4.9.0/xen/arch/x86/mm/shadow/common.c xen-4.9.2/xen/arch/x86/mm/shadow/common.c --- xen-4.9.0/xen/arch/x86/mm/shadow/common.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/mm/shadow/common.c 2018-03-28 13:10:55.000000000 +0000 @@ -1464,7 +1464,7 @@ * TLBs when we reuse the page. Because the destructors leave the * contents of the pages in place, we can delay TLB flushes until * just before the allocator hands the page out again. */ - sp->tlbflush_timestamp = tlbflush_current_time(); + page_set_tlbflush_timestamp(sp); perfc_decr(shadow_alloc_count); page_list_add_tail(sp, &d->arch.paging.shadow.freelist); sp = next; @@ -1503,32 +1503,29 @@ pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0)); d->arch.paging.shadow.p2m_pages++; d->arch.paging.shadow.total_pages--; + ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask)); paging_unlock(d); - /* Unlike shadow pages, mark p2m pages as owned by the domain. - * Marking the domain as the owner would normally allow the guest to - * create mappings of these pages, but these p2m pages will never be - * in the domain's guest-physical address space, and so that is not - * believed to be a concern. */ - page_set_owner(pg, d); - pg->count_info |= 1; return pg; } static void shadow_free_p2m_page(struct domain *d, struct page_info *pg) { - ASSERT(page_get_owner(pg) == d); - /* Should have just the one ref we gave it in alloc_p2m_page() */ - if ( (pg->count_info & PGC_count_mask) != 1 ) + struct domain *owner = page_get_owner(pg); + + /* Should still have no owner and count zero. */ + if ( owner || (pg->count_info & PGC_count_mask) ) { - SHADOW_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n", + SHADOW_ERROR("d%d: Odd p2m page %"PRI_mfn" d=%d c=%lx t=%"PRtype_info"\n", + d->domain_id, mfn_x(page_to_mfn(pg)), + owner ? owner->domain_id : DOMID_INVALID, pg->count_info, pg->u.inuse.type_info); + pg->count_info &= ~PGC_count_mask; + page_set_owner(pg, NULL); } - pg->count_info &= ~PGC_count_mask; pg->u.sh.type = SH_type_p2m_table; /* p2m code reuses type-info */ - page_set_owner(pg, NULL); /* This is called both from the p2m code (which never holds the * paging lock) and the log-dirty code (which always does). */ @@ -3132,7 +3129,9 @@ e = __map_domain_page(pg); write_32bit_pse_identmap(e); unmap_domain_page(e); + pg->count_info = 1; pg->u.inuse.type_info = PGT_l2_page_table | 1 | PGT_validated; + page_set_owner(pg, d); } paging_lock(d); @@ -3170,7 +3169,11 @@ if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) ) p2m_teardown(p2m); if ( rv != 0 && pg != NULL ) + { + pg->count_info &= ~PGC_count_mask; + page_set_owner(pg, NULL); shadow_free_p2m_page(d, pg); + } domain_unpause(d); return rv; } @@ -3279,7 +3282,22 @@ /* Must be called outside the lock */ if ( unpaged_pagetable ) + { + if ( page_get_owner(unpaged_pagetable) == d && + (unpaged_pagetable->count_info & PGC_count_mask) == 1 ) + { + unpaged_pagetable->count_info &= ~PGC_count_mask; + page_set_owner(unpaged_pagetable, NULL); + } + /* Complain here in cases where shadow_free_p2m_page() won't. */ + else if ( !page_get_owner(unpaged_pagetable) && + !(unpaged_pagetable->count_info & PGC_count_mask) ) + SHADOW_ERROR("d%d: Odd unpaged pt %"PRI_mfn" c=%lx t=%"PRtype_info"\n", + d->domain_id, mfn_x(page_to_mfn(unpaged_pagetable)), + unpaged_pagetable->count_info, + unpaged_pagetable->u.inuse.type_info); shadow_free_p2m_page(d, unpaged_pagetable); + } } void shadow_final_teardown(struct domain *d) diff -Nru xen-4.9.0/xen/arch/x86/mm/shadow/multi.c xen-4.9.2/xen/arch/x86/mm/shadow/multi.c --- xen-4.9.0/xen/arch/x86/mm/shadow/multi.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/mm/shadow/multi.c 2018-03-28 13:10:55.000000000 +0000 @@ -922,7 +922,7 @@ shadow_l4e_t new_sl4e, mfn_t sl4mfn) { - int flags = 0, ok; + int flags = 0; shadow_l4e_t old_sl4e; paddr_t paddr; ASSERT(sl4e != NULL); @@ -937,15 +937,16 @@ { /* About to install a new reference */ mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e); - ok = sh_get_ref(d, sl3mfn, paddr); - /* Are we pinning l3 shadows to handle wierd linux behaviour? */ - if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) ) - ok |= sh_pin(d, sl3mfn); - if ( !ok ) + + if ( !sh_get_ref(d, sl3mfn, paddr) ) { domain_crash(d); return SHADOW_SET_ERROR; } + + /* Are we pinning l3 shadows to handle weird Linux behaviour? */ + if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) ) + sh_pin(d, sl3mfn); } /* Write the new entry */ @@ -1484,26 +1485,38 @@ sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty(); } - /* Shadow linear mapping for 4-level shadows. N.B. for 3-level - * shadows on 64-bit xen, this linear mapping is later replaced by the - * monitor pagetable structure, which is built in make_monitor_table - * and maintained by sh_update_linear_entries. */ - sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = - shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); - - /* Self linear mapping. */ - if ( shadow_mode_translate(d) && !shadow_mode_external(d) ) + /* + * Linear mapping slots: + * + * Calling this function with gl4mfn == sl4mfn is used to construct a + * monitor table for translated domains. In this case, gl4mfn forms the + * self-linear mapping (i.e. not pointing into the translated domain), and + * the shadow-linear slot is skipped. The shadow-linear slot is either + * filled when constructing lower level monitor tables, or via + * sh_update_cr3() for 4-level guests. + * + * Calling this function with gl4mfn != sl4mfn is used for non-translated + * guests, where the shadow-linear slot is actually self-linear, and the + * guest-linear slot points into the guests view of its pagetables. + */ + if ( shadow_mode_translate(d) ) { - // linear tables may not be used with translated PV guests - sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = + ASSERT(mfn_eq(gl4mfn, sl4mfn)); + + sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = shadow_l4e_empty(); } else { - sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = - shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW); + ASSERT(!mfn_eq(gl4mfn, sl4mfn)); + + sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = + shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); } + sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = + shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW); + unmap_domain_page(sl4e); } #endif @@ -4018,14 +4031,15 @@ /* Take a ref to this page: it will be released in sh_detach_old_tables() * or the next call to set_toplevel_shadow() */ - if ( !sh_get_ref(d, smfn, 0) ) + if ( sh_get_ref(d, smfn, 0) ) + new_entry = pagetable_from_mfn(smfn); + else { SHADOW_ERROR("can't install %#lx as toplevel shadow\n", mfn_x(smfn)); domain_crash(d); + new_entry = pagetable_null(); } - new_entry = pagetable_from_mfn(smfn); - install_new_entry: /* Done. Install it */ SHADOW_PRINTK("%u/%u [%u] gmfn %#"PRI_mfn" smfn %#"PRI_mfn"\n", @@ -4403,6 +4417,18 @@ /* Carefully look in the shadow linear map for the l1e we expect */ #if SHADOW_PAGING_LEVELS >= 4 + /* + * Non-external guests (i.e. PV) have a SHADOW_LINEAR mapping from the + * moment their shadows are created. External guests (i.e. HVM) may not, + * but always have a regular linear mapping, which we can use to observe + * whether a SHADOW_LINEAR mapping is present. + */ + if ( paging_mode_external(d) ) + { + sl4p = __linear_l4_table + l4_linear_offset(SH_LINEAR_PT_VIRT_START); + if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) + return 0; + } sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr); if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) return 0; diff -Nru xen-4.9.0/xen/arch/x86/mm/shadow/private.h xen-4.9.2/xen/arch/x86/mm/shadow/private.h --- xen-4.9.0/xen/arch/x86/mm/shadow/private.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/mm/shadow/private.h 2018-03-28 13:10:55.000000000 +0000 @@ -530,7 +530,7 @@ x = sp->u.sh.count; nx = x + 1; - if ( unlikely(nx >= 1U<<26) ) + if ( unlikely(nx >= (1U << PAGE_SH_REFCOUNT_WIDTH)) ) { SHADOW_PRINTK("shadow ref overflow, gmfn=%lx smfn=%lx\n", __backpointer(sp), mfn_x(smfn)); diff -Nru xen-4.9.0/xen/arch/x86/mm.c xen-4.9.2/xen/arch/x86/mm.c --- xen-4.9.0/xen/arch/x86/mm.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/mm.c 2018-03-28 13:10:55.000000000 +0000 @@ -595,7 +595,7 @@ void *eff_l1e) { bool_t user_mode = !(v->arch.flags & TF_kernel_mode); -#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) +#define TOGGLE_MODE() if ( user_mode ) toggle_guest_pt(v) TOGGLE_MODE(); guest_get_eff_l1e(addr, eff_l1e); @@ -747,6 +747,63 @@ put_page(page); } +#ifdef CONFIG_PV_LINEAR_PT + +static bool inc_linear_entries(struct page_info *pg) +{ + typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; + + do { + /* + * The check below checks for the "linear use" count being non-zero + * as well as overflow. Signed integer overflow is undefined behavior + * according to the C spec. However, as long as linear_pt_count is + * smaller in size than 'int', the arithmetic operation of the + * increment below won't overflow; rather the result will be truncated + * when stored. Ensure that this is always true. + */ + BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); + oc = nc++; + if ( nc <= 0 ) + return false; + nc = cmpxchg(&pg->linear_pt_count, oc, nc); + } while ( oc != nc ); + + return true; +} + +static void dec_linear_entries(struct page_info *pg) +{ + typeof(pg->linear_pt_count) oc; + + oc = arch_fetch_and_add(&pg->linear_pt_count, -1); + ASSERT(oc > 0); +} + +static bool inc_linear_uses(struct page_info *pg) +{ + typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; + + do { + /* See the respective comment in inc_linear_entries(). */ + BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); + oc = nc--; + if ( nc >= 0 ) + return false; + nc = cmpxchg(&pg->linear_pt_count, oc, nc); + } while ( oc != nc ); + + return true; +} + +static void dec_linear_uses(struct page_info *pg) +{ + typeof(pg->linear_pt_count) oc; + + oc = arch_fetch_and_add(&pg->linear_pt_count, 1); + ASSERT(oc < 0); +} + /* * We allow root tables to map each other (a.k.a. linear page tables). It * needs some special care with reference counts and access permissions: @@ -759,6 +816,9 @@ * frame if it is mapped by a different root table. This is sufficient and * also necessary to allow validation of a root table mapping itself. */ +static bool __read_mostly opt_pv_linear_pt = true; +boolean_param("pv-linear-pt", opt_pv_linear_pt); + #define define_get_linear_pagetable(level) \ static int \ get_##level##_linear_pagetable( \ @@ -768,6 +828,13 @@ struct page_info *page; \ unsigned long pfn; \ \ + if ( !opt_pv_linear_pt ) \ + { \ + gdprintk(XENLOG_WARNING, \ + "Attempt to create linear p.t. (feature disabled)\n"); \ + return 0; \ + } \ + \ if ( (level##e_get_flags(pde) & _PAGE_RW) ) \ { \ gdprintk(XENLOG_WARNING, \ @@ -777,15 +844,35 @@ \ if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \ { \ + struct page_info *ptpg = mfn_to_page(pde_pfn); \ + \ + /* Make sure the page table belongs to the correct domain. */ \ + if ( unlikely(page_get_owner(ptpg) != d) ) \ + return 0; \ + \ /* Make sure the mapped frame belongs to the correct domain. */ \ if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \ return 0; \ \ /* \ - * Ensure that the mapped frame is an already-validated page table. \ + * Ensure that the mapped frame is an already-validated page table \ + * and is not itself having linear entries, as well as that the \ + * containing page table is not iself in use as a linear page table \ + * elsewhere. \ * If so, atomically increment the count (checking for overflow). \ */ \ page = mfn_to_page(pfn); \ + if ( !inc_linear_entries(ptpg) ) \ + { \ + put_page(page); \ + return 0; \ + } \ + if ( !inc_linear_uses(page) ) \ + { \ + dec_linear_entries(ptpg); \ + put_page(page); \ + return 0; \ + } \ y = page->u.inuse.type_info; \ do { \ x = y; \ @@ -793,6 +880,8 @@ unlikely((x & (PGT_type_mask|PGT_validated)) != \ (PGT_##level##_page_table|PGT_validated)) ) \ { \ + dec_linear_uses(page); \ + dec_linear_entries(ptpg); \ put_page(page); \ return 0; \ } \ @@ -803,6 +892,27 @@ return 1; \ } +#else /* CONFIG_PV_LINEAR_PT */ + +#define define_get_linear_pagetable(level) \ +static int \ +get_##level##_linear_pagetable( \ + level##_pgentry_t pde, unsigned long pde_pfn, struct domain *d) \ +{ \ + return 0; \ +} + +static void dec_linear_uses(struct page_info *pg) +{ + ASSERT(pg->linear_pt_count == 0); +} + +static void dec_linear_entries(struct page_info *pg) +{ + ASSERT(pg->linear_pt_count == 0); +} + +#endif /* CONFIG_PV_LINEAR_PT */ bool is_iomem_page(mfn_t mfn) { @@ -1212,11 +1322,23 @@ _PAGE_USER|_PAGE_RW); \ } while ( 0 ) +/* + * When shadowing an L4 behind the guests back (e.g. for per-pcpu + * purposes), we cannot efficiently sync access bit updates from hardware + * (on the shadow tables) back into the guest view. + * + * We therefore unconditionally set _PAGE_ACCESSED even in the guests + * view. This will appear to the guest as a CPU which proactively pulls + * all valid L4e's into its TLB, which is compatible with the x86 ABI. + * + * At the time of writing, all PV guests set the access bit anyway, so + * this is no actual change in their behaviour. + */ #define adjust_guest_l4e(pl4e, d) \ do { \ if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) && \ likely(!is_pv_32bit_domain(d)) ) \ - l4e_add_flags((pl4e), _PAGE_USER); \ + l4e_add_flags((pl4e), _PAGE_USER | _PAGE_ACCESSED); \ } while ( 0 ) #define unadjust_guest_l3e(pl3e, d) \ @@ -1226,6 +1348,9 @@ l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \ } while ( 0 ) +static int _put_page_type(struct page_info *page, bool preemptible, + struct page_info *ptpg); + void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner) { unsigned long pfn = l1e_get_pfn(l1e); @@ -1296,17 +1421,22 @@ if ( l2e_get_flags(l2e) & _PAGE_PSE ) put_superpage(l2e_get_pfn(l2e)); else - put_page_and_type(l2e_get_page(l2e)); + { + struct page_info *pg = l2e_get_page(l2e); + int rc = _put_page_type(pg, false, mfn_to_page(pfn)); + + ASSERT(!rc); + put_page(pg); + } return 0; } -static int __put_page_type(struct page_info *, int preemptible); - static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, int partial, bool_t defer) { struct page_info *pg; + int rc; if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) ) return 1; @@ -1329,21 +1459,28 @@ if ( unlikely(partial > 0) ) { ASSERT(!defer); - return __put_page_type(pg, 1); + return _put_page_type(pg, true, mfn_to_page(pfn)); } if ( defer ) { + current->arch.old_guest_ptpg = mfn_to_page(pfn); current->arch.old_guest_table = pg; return 0; } - return put_page_and_type_preemptible(pg); + rc = _put_page_type(pg, true, mfn_to_page(pfn)); + if ( likely(!rc) ) + put_page(pg); + + return rc; } static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, int partial, bool_t defer) { + int rc = 1; + if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && (l4e_get_pfn(l4e) != pfn) ) { @@ -1352,18 +1489,22 @@ if ( unlikely(partial > 0) ) { ASSERT(!defer); - return __put_page_type(pg, 1); + return _put_page_type(pg, true, mfn_to_page(pfn)); } if ( defer ) { + current->arch.old_guest_ptpg = mfn_to_page(pfn); current->arch.old_guest_table = pg; return 0; } - return put_page_and_type_preemptible(pg); + rc = _put_page_type(pg, true, mfn_to_page(pfn)); + if ( likely(!rc) ) + put_page(pg); } - return 1; + + return rc; } static int alloc_l1_table(struct page_info *page) @@ -1561,6 +1702,7 @@ { page->nr_validated_ptes = i; page->partial_pte = 0; + current->arch.old_guest_ptpg = NULL; current->arch.old_guest_table = page; } while ( i-- > 0 ) @@ -1654,6 +1796,7 @@ { if ( current->arch.old_guest_table ) page->nr_validated_ptes++; + current->arch.old_guest_ptpg = NULL; current->arch.old_guest_table = page; } } @@ -1825,7 +1968,11 @@ do { x = y; + ASSERT((x & PGT_count_mask) && (x & PGT_locked)); + nx = x - (1 | PGT_locked); + /* We must not drop the last reference here. */ + ASSERT(nx & PGT_count_mask); } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x ); } @@ -1911,7 +2058,6 @@ if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) { - /* Translate foreign guest addresses. */ struct page_info *page = NULL; if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)) ) @@ -1921,9 +2067,35 @@ return -EINVAL; } + /* Translate foreign guest address. */ if ( paging_mode_translate(pg_dom) ) { - page = get_page_from_gfn(pg_dom, l1e_get_pfn(nl1e), NULL, P2M_ALLOC); + p2m_type_t p2mt; + p2m_query_t q = l1e_get_flags(nl1e) & _PAGE_RW ? + P2M_ALLOC | P2M_UNSHARE : P2M_ALLOC; + + page = get_page_from_gfn(pg_dom, l1e_get_pfn(nl1e), &p2mt, q); + + if ( p2m_is_paged(p2mt) ) + { + if ( page ) + put_page(page); + p2m_mem_paging_populate(pg_dom, l1e_get_pfn(nl1e)); + return -ENOENT; + } + + if ( p2mt == p2m_ram_paging_in && !page ) + return -ENOENT; + + /* Did our attempt to unshare fail? */ + if ( (q & P2M_UNSHARE) && p2m_is_shared(p2mt) ) + { + /* We could not have obtained a page ref. */ + ASSERT(!page); + /* And mem_sharing_notify has already been called. */ + return -ENOMEM; + } + if ( !page ) return -EINVAL; nl1e = l1e_from_pfn(page_to_mfn(page), l1e_get_flags(nl1e)); @@ -2403,14 +2575,20 @@ } -static int __put_final_page_type( - struct page_info *page, unsigned long type, int preemptible) +static int _put_final_page_type(struct page_info *page, unsigned long type, + bool preemptible, struct page_info *ptpg) { int rc = free_page_type(page, type, preemptible); /* No need for atomic update of type_info here: noone else updates it. */ if ( rc == 0 ) { + if ( ptpg && PGT_type_equal(type, ptpg->u.inuse.type_info) ) + { + dec_linear_uses(page); + dec_linear_entries(ptpg); + } + ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying); /* * Record TLB information for flush later. We do not stamp page tables * when running in shadow mode: @@ -2420,7 +2598,7 @@ */ if ( !(shadow_mode_enabled(page_get_owner(page)) && (page->count_info & PGC_page_table)) ) - page->tlbflush_timestamp = tlbflush_current_time(); + page_set_tlbflush_timestamp(page); wmb(); page->u.inuse.type_info--; } @@ -2428,9 +2606,6 @@ { ASSERT((page->u.inuse.type_info & (PGT_count_mask|PGT_validated|PGT_partial)) == 1); - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) - page->tlbflush_timestamp = tlbflush_current_time(); wmb(); page->u.inuse.type_info |= PGT_validated; } @@ -2446,8 +2621,8 @@ } -static int __put_page_type(struct page_info *page, - int preemptible) +static int _put_page_type(struct page_info *page, bool preemptible, + struct page_info *ptpg) { unsigned long nx, x, y = page->u.inuse.type_info; int rc = 0; @@ -2474,7 +2649,8 @@ x, nx)) != x) ) continue; /* We cleared the 'valid bit' so we do the clean up. */ - rc = __put_final_page_type(page, x, preemptible); + rc = _put_final_page_type(page, x, preemptible, ptpg); + ptpg = NULL; if ( x & PGT_partial ) put_page(page); break; @@ -2486,10 +2662,28 @@ * 1. Pointless, since it's the shadow pt's which must be tracked. * 2. Shadow mode reuses this field for shadowed page tables to * store flags info -- we don't want to conflict with that. + * Also page_set_tlbflush_timestamp() accesses the same union + * linear_pt_count lives in. Pages (including page table ones), + * however, don't need their flush time stamp set except when + * the last reference is being dropped. For page table pages + * this happens in _put_final_page_type(). */ - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) - page->tlbflush_timestamp = tlbflush_current_time(); + if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) + BUG_ON(!IS_ENABLED(CONFIG_PV_LINEAR_PT)); + else if ( !(shadow_mode_enabled(page_get_owner(page)) && + (page->count_info & PGC_page_table)) ) + page_set_tlbflush_timestamp(page); + } + else if ( unlikely((nx & (PGT_locked | PGT_count_mask)) == + (PGT_locked | 1)) ) + { + /* + * We must not drop the second to last reference when the page is + * locked, as page_unlock() doesn't do any cleanup of the type. + */ + cpu_relax(); + y = page->u.inuse.type_info; + continue; } if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) @@ -2499,6 +2693,13 @@ return -EINTR; } + if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) + { + ASSERT(!rc); + dec_linear_uses(page); + dec_linear_entries(ptpg); + } + return rc; } @@ -2638,6 +2839,7 @@ page->nr_validated_ptes = 0; page->partial_pte = 0; } + page->linear_pt_count = 0; rc = alloc_page_type(page, type, preemptible); } @@ -2652,7 +2854,7 @@ void put_page_type(struct page_info *page) { - int rc = __put_page_type(page, 0); + int rc = _put_page_type(page, false, NULL); ASSERT(rc == 0); (void)rc; } @@ -2668,7 +2870,7 @@ int put_page_type_preemptible(struct page_info *page) { - return __put_page_type(page, 1); + return _put_page_type(page, true, NULL); } int get_page_type_preemptible(struct page_info *page, unsigned long type) @@ -2878,11 +3080,14 @@ if ( !v->arch.old_guest_table ) return 0; - switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) ) + switch ( rc = _put_page_type(v->arch.old_guest_table, true, + v->arch.old_guest_ptpg) ) { case -EINTR: case -ERESTART: return -ERESTART; + case 0: + put_page(v->arch.old_guest_table); } v->arch.old_guest_table = NULL; @@ -3042,6 +3247,7 @@ rc = -ERESTART; /* fallthrough */ case -ERESTART: + curr->arch.old_guest_ptpg = NULL; curr->arch.old_guest_table = page; break; default: @@ -3310,7 +3516,10 @@ if ( type == PGT_l1_page_table ) put_page_and_type(page); else + { + curr->arch.old_guest_ptpg = NULL; curr->arch.old_guest_table = page; + } } } @@ -3346,6 +3555,7 @@ { case -EINTR: case -ERESTART: + curr->arch.old_guest_ptpg = NULL; curr->arch.old_guest_table = page; rc = 0; break; @@ -3425,6 +3635,7 @@ rc = -ERESTART; /* fallthrough */ case -ERESTART: + curr->arch.old_guest_ptpg = NULL; curr->arch.old_guest_table = page; break; default: @@ -3692,6 +3903,7 @@ struct vcpu *curr = current, *v = curr; struct domain *d = v->domain, *pt_owner = d, *pg_owner; struct domain_mmap_cache mapcache; + bool sync_guest = false; uint32_t xsm_needed = 0; uint32_t xsm_checked = 0; int rc = put_old_guest_table(curr); @@ -3803,7 +4015,7 @@ if ( p2m_is_paged(p2mt) ) { ASSERT(!page); - p2m_mem_paging_populate(pg_owner, gmfn); + p2m_mem_paging_populate(pt_owner, gmfn); rc = -ENOENT; break; } @@ -3825,47 +4037,10 @@ switch ( page->u.inuse.type_info & PGT_type_mask ) { case PGT_l1_page_table: - { - l1_pgentry_t l1e = l1e_from_intpte(req.val); - p2m_type_t l1e_p2mt = p2m_ram_rw; - struct page_info *target = NULL; - p2m_query_t q = (l1e_get_flags(l1e) & _PAGE_RW) ? - P2M_UNSHARE : P2M_ALLOC; - - if ( paging_mode_translate(pg_owner) ) - target = get_page_from_gfn(pg_owner, l1e_get_pfn(l1e), - &l1e_p2mt, q); - - if ( p2m_is_paged(l1e_p2mt) ) - { - if ( target ) - put_page(target); - p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e)); - rc = -ENOENT; - break; - } - else if ( p2m_ram_paging_in == l1e_p2mt && !target ) - { - rc = -ENOENT; - break; - } - /* If we tried to unshare and failed */ - else if ( (q & P2M_UNSHARE) && p2m_is_shared(l1e_p2mt) ) - { - /* We could not have obtained a page ref. */ - ASSERT(target == NULL); - /* And mem_sharing_notify has already been called. */ - rc = -ENOMEM; - break; - } - - rc = mod_l1_entry(va, l1e, mfn, + rc = mod_l1_entry(va, l1e_from_intpte(req.val), mfn, cmd == MMU_PT_UPDATE_PRESERVE_AD, v, pg_owner); - if ( target ) - put_page(target); - } - break; + break; case PGT_l2_page_table: rc = mod_l2_entry(va, l2e_from_intpte(req.val), mfn, cmd == MMU_PT_UPDATE_PRESERVE_AD, v); @@ -3877,7 +4052,19 @@ case PGT_l4_page_table: rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn, cmd == MMU_PT_UPDATE_PRESERVE_AD, v); - break; + /* + * No need to sync if all uses of the page can be accounted + * to the page lock we hold, its pinned status, and uses on + * this (v)CPU. + */ + if ( !rc && this_cpu(root_pgt) && + ((page->u.inuse.type_info & PGT_count_mask) > + (1 + !!(page->u.inuse.type_info & PGT_pinned) + + (pagetable_get_pfn(curr->arch.guest_table) == mfn) + + (pagetable_get_pfn(curr->arch.guest_table_user) == + mfn))) ) + sync_guest = true; + break; case PGT_writable_page: perfc_incr(writable_mmu_updates); if ( paging_write_guest_entry(v, va, req.val, _mfn(mfn)) ) @@ -3979,6 +4166,20 @@ domain_mmap_cache_destroy(&mapcache); + if ( sync_guest ) + { + /* + * Force other vCPU-s of the affected guest to pick up L4 entry + * changes (if any). + */ + unsigned int cpu = smp_processor_id(); + cpumask_t *mask = per_cpu(scratch_cpumask, cpu); + + cpumask_andnot(mask, pt_owner->domain_dirty_cpumask, cpumask_of(cpu)); + if ( !cpumask_empty(mask) ) + flush_mask(mask, FLUSH_TLB_GLOBAL); + } + perfc_add(num_page_updates, i); out: @@ -4006,6 +4207,9 @@ l1_pgentry_t ol1e; struct domain *d = v->domain; + if ( !IS_ALIGNED(pte_addr, sizeof(nl1e)) ) + return GNTST_general_error; + adjust_guest_l1e(nl1e, d); gmfn = pte_addr >> PAGE_SHIFT; @@ -4055,7 +4259,8 @@ } static int destroy_grant_pte_mapping( - uint64_t addr, unsigned long frame, struct domain *d) + uint64_t addr, unsigned long frame, unsigned int grant_pte_flags, + struct domain *d) { int rc = GNTST_okay; void *va; @@ -4063,6 +4268,16 @@ struct page_info *page; l1_pgentry_t ol1e; + /* + * addr comes from Xen's active_entry tracking so isn't guest controlled, + * but it had still better be PTE-aligned. + */ + if ( !IS_ALIGNED(addr, sizeof(ol1e)) ) + { + ASSERT_UNREACHABLE(); + return GNTST_general_error; + } + gmfn = addr >> PAGE_SHIFT; page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); @@ -4091,17 +4306,29 @@ ol1e = *(l1_pgentry_t *)va; - /* Check that the virtual address supplied is actually mapped to frame. */ - if ( unlikely(l1e_get_pfn(ol1e) != frame) ) + /* + * Check that the PTE supplied actually maps frame (with appropriate + * permissions). + */ + if ( unlikely(l1e_get_pfn(ol1e) != frame) || + unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & + (_PAGE_PRESENT | _PAGE_RW)) ) { page_unlock(page); - gdprintk(XENLOG_WARNING, - "PTE entry %"PRIpte" for address %"PRIx64" doesn't match frame %lx\n", - l1e_get_intpte(ol1e), addr, frame); + gdprintk(XENLOG_ERR, + "PTE %"PRIpte" at %"PRIx64" doesn't match grant (%"PRIpte")\n", + l1e_get_intpte(ol1e), addr, + l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags))); rc = GNTST_general_error; goto failed; } + if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & + ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) ) + gdprintk(XENLOG_WARNING, + "PTE flags %x at %"PRIx64" don't match grant (%x)\n", + l1e_get_flags(ol1e), addr, grant_pte_flags); + /* Delete pagetable entry. */ if ( unlikely(!UPDATE_ENTRY (l1, @@ -4110,7 +4337,8 @@ 0)) ) { page_unlock(page); - gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", va); + gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %"PRIx64"\n", + addr); rc = GNTST_general_error; goto failed; } @@ -4178,7 +4406,8 @@ } static int replace_grant_va_mapping( - unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu *v) + unsigned long addr, unsigned long frame, unsigned int grant_pte_flags, + l1_pgentry_t nl1e, struct vcpu *v) { l1_pgentry_t *pl1e, ol1e; unsigned long gl1mfn; @@ -4214,20 +4443,33 @@ ol1e = *pl1e; - /* Check that the virtual address supplied is actually mapped to frame. */ - if ( unlikely(l1e_get_pfn(ol1e) != frame) ) - { - gdprintk(XENLOG_WARNING, - "PTE entry %lx for address %lx doesn't match frame %lx\n", - l1e_get_pfn(ol1e), addr, frame); + /* + * Check that the virtual address supplied is actually mapped to frame + * (with appropriate permissions). + */ + if ( unlikely(l1e_get_pfn(ol1e) != frame) || + unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & + (_PAGE_PRESENT | _PAGE_RW)) ) + { + gdprintk(XENLOG_ERR, + "PTE %"PRIpte" for %lx doesn't match grant (%"PRIpte")\n", + l1e_get_intpte(ol1e), addr, + l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags))); rc = GNTST_general_error; goto unlock_and_out; } + if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & + ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) ) + gdprintk(XENLOG_WARNING, + "PTE flags %x for %"PRIx64" don't match grant (%x)\n", + l1e_get_flags(ol1e), addr, grant_pte_flags); + /* Delete pagetable entry. */ if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) ) { - gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", pl1e); + gdprintk(XENLOG_WARNING, "Cannot delete PTE entry for %"PRIx64"\n", + addr); rc = GNTST_general_error; goto unlock_and_out; } @@ -4241,9 +4483,11 @@ } static int destroy_grant_va_mapping( - unsigned long addr, unsigned long frame, struct vcpu *v) + unsigned long addr, unsigned long frame, unsigned int grant_pte_flags, + struct vcpu *v) { - return replace_grant_va_mapping(addr, frame, l1e_empty(), v); + return replace_grant_va_mapping(addr, frame, grant_pte_flags, + l1e_empty(), v); } static int create_grant_p2m_mapping(uint64_t addr, unsigned long frame, @@ -4338,20 +4582,39 @@ unsigned long gl1mfn; struct page_info *l1pg; int rc; + unsigned int grant_pte_flags; if ( paging_mode_external(current->domain) ) return replace_grant_p2m_mapping(addr, frame, new_addr, flags); + grant_pte_flags = + _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_GNTTAB | _PAGE_NX; + + if ( flags & GNTMAP_application_map ) + grant_pte_flags |= _PAGE_USER; + if ( !(flags & GNTMAP_readonly) ) + grant_pte_flags |= _PAGE_RW; + /* + * On top of the explicit settings done by create_grant_host_mapping() + * also open-code relevant parts of adjust_guest_l1e(). Don't mirror + * available and cachability flags, though. + */ + if ( !is_pv_32bit_domain(curr->domain) ) + grant_pte_flags |= (grant_pte_flags & _PAGE_USER) + ? _PAGE_GLOBAL + : _PAGE_GUEST_KERNEL | _PAGE_USER; + if ( flags & GNTMAP_contains_pte ) { if ( !new_addr ) - return destroy_grant_pte_mapping(addr, frame, curr->domain); + return destroy_grant_pte_mapping(addr, frame, grant_pte_flags, + curr->domain); return GNTST_general_error; } if ( !new_addr ) - return destroy_grant_va_mapping(addr, frame, curr); + return destroy_grant_va_mapping(addr, frame, grant_pte_flags, curr); pl1e = guest_map_l1e(new_addr, &gl1mfn); if ( !pl1e ) @@ -4399,7 +4662,7 @@ put_page(l1pg); guest_unmap_l1e(pl1e); - rc = replace_grant_va_mapping(addr, frame, ol1e, curr); + rc = replace_grant_va_mapping(addr, frame, grant_pte_flags, ol1e, curr); if ( rc && !paging_mode_refcounts(curr->domain) ) put_page_from_l1e(ol1e, curr->domain); @@ -4860,6 +5123,9 @@ int rc = 0; p2m_type_t p2mt; + if ( !paging_mode_translate(d) ) + return -EACCES; + switch ( space ) { case XENMAPSPACE_shared_info: @@ -4916,7 +5182,7 @@ break; } - if ( !paging_mode_translate(d) || (mfn == 0) ) + if ( mfn == 0 ) { if ( page ) put_page(page); @@ -4945,8 +5211,12 @@ /* Unmap from old location, if any. */ old_gpfn = get_gpfn_from_mfn(mfn); ASSERT( old_gpfn != SHARED_M2P_ENTRY ); - if ( space == XENMAPSPACE_gmfn || space == XENMAPSPACE_gmfn_range ) - ASSERT( old_gpfn == gfn ); + if ( (space == XENMAPSPACE_gmfn || space == XENMAPSPACE_gmfn_range) && + old_gpfn != gfn ) + { + rc = -EXDEV; + goto put_both; + } if ( old_gpfn != INVALID_M2P_ENTRY ) rc = guest_physmap_remove_page(d, _gfn(old_gpfn), _mfn(mfn), PAGE_ORDER_4K); @@ -6046,9 +6316,29 @@ { unsigned long base_mfn; - pl1e = l2e_to_l1e(*pl2e); if ( locking ) spin_lock(&map_pgdir_lock); + + ol2e = *pl2e; + /* + * L2E may be already cleared, or set to a superpage, by + * concurrent paging structure modifications on other CPUs. + */ + if ( !(l2e_get_flags(ol2e) & _PAGE_PRESENT) ) + { + if ( locking ) + spin_unlock(&map_pgdir_lock); + continue; + } + + if ( l2e_get_flags(ol2e) & _PAGE_PSE ) + { + if ( locking ) + spin_unlock(&map_pgdir_lock); + goto check_l3; + } + + pl1e = l2e_to_l1e(ol2e); base_mfn = l1e_get_pfn(*pl1e) & ~(L1_PAGETABLE_ENTRIES - 1); for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++, pl1e++ ) if ( (l1e_get_pfn(*pl1e) != (base_mfn + i)) || @@ -6056,7 +6346,6 @@ break; if ( i == L1_PAGETABLE_ENTRIES ) { - ol2e = *pl2e; l2e_write_atomic(pl2e, l2e_from_pfn(base_mfn, l1f_to_lNf(flags))); if ( locking ) @@ -6082,7 +6371,20 @@ if ( locking ) spin_lock(&map_pgdir_lock); + ol3e = *pl3e; + /* + * L3E may be already cleared, or set to a superpage, by + * concurrent paging structure modifications on other CPUs. + */ + if ( !(l3e_get_flags(ol3e) & _PAGE_PRESENT) || + (l3e_get_flags(ol3e) & _PAGE_PSE) ) + { + if ( locking ) + spin_unlock(&map_pgdir_lock); + continue; + } + pl2e = l3e_to_l2e(ol3e); base_mfn = l2e_get_pfn(*pl2e) & ~(L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES - 1); @@ -6149,7 +6451,7 @@ { l3_pgentry_t *pl3e = virt_to_xen_l3e(v); - if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) + if ( !pl3e || !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) { /* Confirm the caller isn't trying to create new mappings. */ ASSERT(!(nf & _PAGE_PRESENT)); @@ -6177,6 +6479,8 @@ /* PAGE1GB: shatter the superpage and fall through. */ pl2e = alloc_xen_pagetable(); + if ( !pl2e ) + return -ENOMEM; for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) l2e_write(pl2e + i, l2e_from_pfn(l3e_get_pfn(*pl3e) + @@ -6197,7 +6501,11 @@ free_xen_pagetable(pl2e); } - pl2e = virt_to_xen_l2e(v); + /* + * The L3 entry has been verified to be present, and we've dealt with + * 1G pages as well, so the L2 table cannot require allocation. + */ + pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v); if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) { @@ -6226,6 +6534,8 @@ { /* PSE: shatter the superpage and try again. */ pl1e = alloc_xen_pagetable(); + if ( !pl1e ) + return -ENOMEM; for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) l1e_write(&pl1e[i], l1e_from_pfn(l2e_get_pfn(*pl2e) + i, @@ -6249,7 +6559,11 @@ { l1_pgentry_t nl1e; - /* Ordinary 4kB mapping. */ + /* + * Ordinary 4kB mapping: The L2 entry has been verified to be + * present, and we've dealt with 2M pages as well, so the L1 table + * cannot require allocation. + */ pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(v); /* Confirm the caller isn't trying to create new mappings. */ @@ -6269,6 +6583,27 @@ */ if ( (nf & _PAGE_PRESENT) || ((v != e) && (l1_table_offset(v) != 0)) ) continue; + if ( locking ) + spin_lock(&map_pgdir_lock); + + /* + * L2E may be already cleared, or set to a superpage, by + * concurrent paging structure modifications on other CPUs. + */ + if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) + { + if ( locking ) + spin_unlock(&map_pgdir_lock); + goto check_l3; + } + + if ( l2e_get_flags(*pl2e) & _PAGE_PSE ) + { + if ( locking ) + spin_unlock(&map_pgdir_lock); + continue; + } + pl1e = l2e_to_l1e(*pl2e); for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) if ( l1e_get_intpte(pl1e[i]) != 0 ) @@ -6277,11 +6612,16 @@ { /* Empty: zap the L2E and free the L1 page. */ l2e_write_atomic(pl2e, l2e_empty()); + if ( locking ) + spin_unlock(&map_pgdir_lock); flush_area(NULL, FLUSH_TLB_GLOBAL); /* flush before free */ free_xen_pagetable(pl1e); } + else if ( locking ) + spin_unlock(&map_pgdir_lock); } + check_l3: /* * If we are not destroying mappings, or not done with the L3E, * skip the empty&free check. @@ -6289,6 +6629,21 @@ if ( (nf & _PAGE_PRESENT) || ((v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0)) ) continue; + if ( locking ) + spin_lock(&map_pgdir_lock); + + /* + * L3E may be already cleared, or set to a superpage, by + * concurrent paging structure modifications on other CPUs. + */ + if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) || + (l3e_get_flags(*pl3e) & _PAGE_PSE) ) + { + if ( locking ) + spin_unlock(&map_pgdir_lock); + continue; + } + pl2e = l3e_to_l2e(*pl3e); for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) if ( l2e_get_intpte(pl2e[i]) != 0 ) @@ -6297,9 +6652,13 @@ { /* Empty: zap the L3E and free the L2 page. */ l3e_write_atomic(pl3e, l3e_empty()); + if ( locking ) + spin_unlock(&map_pgdir_lock); flush_area(NULL, FLUSH_TLB_GLOBAL); /* flush before free */ free_xen_pagetable(pl2e); } + else if ( locking ) + spin_unlock(&map_pgdir_lock); } flush_area(NULL, FLUSH_TLB_GLOBAL); @@ -6615,6 +6974,14 @@ memguard_unguard_range(p, PAGE_SIZE); } +bool memguard_is_stack_guard_page(unsigned long addr) +{ + addr &= STACK_SIZE - 1; + + return addr >= STACK_SIZE - PRIMARY_STACK_SIZE - PAGE_SIZE && + addr < STACK_SIZE - PRIMARY_STACK_SIZE; +} + void arch_dump_shared_mem_info(void) { printk("Shared frames %u -- Saved frames %u\n", diff -Nru xen-4.9.0/xen/arch/x86/msi.c xen-4.9.2/xen/arch/x86/msi.c --- xen-4.9.0/xen/arch/x86/msi.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/msi.c 2018-03-28 13:10:55.000000000 +0000 @@ -1050,11 +1050,10 @@ old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI); if ( old_desc ) { - printk(XENLOG_WARNING "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n", + printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n", msi->irq, msi->seg, msi->bus, PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); - *desc = old_desc; - return 0; + return -EEXIST; } old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX); @@ -1118,11 +1117,10 @@ old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX); if ( old_desc ) { - printk(XENLOG_WARNING "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n", + printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n", msi->irq, msi->seg, msi->bus, PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); - *desc = old_desc; - return 0; + return -EEXIST; } old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI); diff -Nru xen-4.9.0/xen/arch/x86/physdev.c xen-4.9.2/xen/arch/x86/physdev.c --- xen-4.9.0/xen/arch/x86/physdev.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/physdev.c 2018-03-28 13:10:55.000000000 +0000 @@ -111,7 +111,7 @@ if ( d == NULL ) return -ESRCH; - ret = xsm_map_domain_pirq(XSM_TARGET, d); + ret = xsm_map_domain_pirq(XSM_DM_PRIV, d); if ( ret ) goto free_domain; @@ -186,7 +186,7 @@ } else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) { - if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) + if ( msi->entry_nr <= 0 || msi->entry_nr > MAX_MSI_IRQS ) ret = -EDOM; else if ( msi->entry_nr != 1 && !iommu_intremap ) ret = -EOPNOTSUPP; @@ -256,13 +256,14 @@ int physdev_unmap_pirq(domid_t domid, int pirq) { struct domain *d; - int ret; + int ret = 0; d = rcu_lock_domain_by_any_id(domid); if ( d == NULL ) return -ESRCH; - ret = xsm_unmap_domain_pirq(XSM_TARGET, d); + if ( domid != DOMID_SELF || !is_hvm_domain(d) || !has_pirq(d) ) + ret = xsm_unmap_domain_pirq(XSM_DM_PRIV, d); if ( ret ) goto free_domain; diff -Nru xen-4.9.0/xen/arch/x86/pv/dom0_build.c xen-4.9.2/xen/arch/x86/pv/dom0_build.c --- xen-4.9.0/xen/arch/x86/pv/dom0_build.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/pv/dom0_build.c 2018-03-28 13:10:55.000000000 +0000 @@ -870,6 +870,13 @@ regs->rsi = vstartinfo_start; regs->eflags = X86_EFLAGS_IF; + /* + * We don't call arch_set_info_guest(), so some initialisation needs doing + * by hand: + * - Reset the GDT to reference zero_page + */ + destroy_gdt(v); + if ( test_bit(XENFEAT_supervisor_mode_kernel, parms.f_required) ) panic("Dom0 requires supervisor-mode execution"); diff -Nru xen-4.9.0/xen/arch/x86/Rules.mk xen-4.9.2/xen/arch/x86/Rules.mk --- xen-4.9.0/xen/arch/x86/Rules.mk 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/Rules.mk 2018-03-28 13:10:55.000000000 +0000 @@ -30,3 +30,16 @@ ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n) CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE endif + +# Compile with thunk-extern, indirect-branch-register if avaiable. +ifneq ($(call cc-option,$(CC),-mindirect-branch-register,n),n) +CFLAGS += -mindirect-branch=thunk-extern -mindirect-branch-register +CFLAGS += -DCONFIG_INDIRECT_THUNK +export CONFIG_INDIRECT_THUNK=y +endif + +# Set up the assembler include path properly for older GCC toolchains. Clang +# objects to the agument being passed however. +ifneq ($(clang),y) +CFLAGS += -Wa,-I$(BASEDIR)/include +endif diff -Nru xen-4.9.0/xen/arch/x86/setup.c xen-4.9.2/xen/arch/x86/setup.c --- xen-4.9.0/xen/arch/x86/setup.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/setup.c 2018-03-28 13:10:55.000000000 +0000 @@ -51,6 +51,7 @@ #include #include #include +#include /* opt_nosmp: If true, secondary processors are ignored. */ static bool_t __initdata opt_nosmp; @@ -638,7 +639,7 @@ module_t *mod = (module_t *)__va(mbi->mods_addr); unsigned long nr_pages, raw_max_page, modules_headroom, *module_map; int i, j, e820_warn = 0, bytes = 0; - bool_t acpi_boot_table_init_done = 0; + bool acpi_boot_table_init_done = false, relocated = false; struct domain *dom0; struct ns16550_defaults ns16550 = { .data_bits = 8, @@ -652,6 +653,7 @@ set_processor_id(0); set_current(INVALID_VCPU); /* debug sanity. */ idle_vcpu[0] = current; + init_shadow_spec_ctrl_state(); percpu_init_areas(); @@ -889,8 +891,10 @@ mod[i].reserved = 0; } - if ( efi_enabled(EFI_LOADER) ) + if ( xen_phys_start ) { + relocated = true; + /* * This needs to remain in sync with xen_in_range() and the * respective reserve_e820_ram() invocation below. @@ -1083,8 +1087,7 @@ /* Don't overlap with other modules (or Xen itself). */ end = consider_modules(s, e, size, mod, - mbi->mods_count + efi_enabled(EFI_LOADER), - j); + mbi->mods_count + relocated, j); if ( highmem_start && end > highmem_start ) continue; @@ -1111,7 +1114,7 @@ { /* Don't overlap with modules (or Xen itself). */ e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size), mod, - mbi->mods_count + efi_enabled(EFI_LOADER), -1); + mbi->mods_count + relocated, -1); if ( s >= e ) break; if ( e > kexec_crash_area_limit ) @@ -1470,14 +1473,14 @@ if ( !opt_smep ) setup_clear_cpu_cap(X86_FEATURE_SMEP); if ( cpu_has_smep && opt_smep != SMEP_HVM_ONLY ) - __set_bit(X86_FEATURE_XEN_SMEP, boot_cpu_data.x86_capability); + setup_force_cpu_cap(X86_FEATURE_XEN_SMEP); if ( boot_cpu_has(X86_FEATURE_XEN_SMEP) ) set_in_cr4(X86_CR4_SMEP); if ( !opt_smap ) setup_clear_cpu_cap(X86_FEATURE_SMAP); if ( cpu_has_smap && opt_smap != SMAP_HVM_ONLY ) - __set_bit(X86_FEATURE_XEN_SMAP, boot_cpu_data.x86_capability); + setup_force_cpu_cap(X86_FEATURE_XEN_SMAP); if ( boot_cpu_has(X86_FEATURE_XEN_SMAP) ) set_in_cr4(X86_CR4_SMAP); @@ -1486,6 +1489,8 @@ if ( cpu_has_fsgsbase ) set_in_cr4(X86_CR4_FSGSBASE); + init_speculation_mitigations(); + init_idle_domain(); this_cpu(stubs.addr) = alloc_stub_page(smp_processor_id(), diff -Nru xen-4.9.0/xen/arch/x86/smpboot.c xen-4.9.2/xen/arch/x86/smpboot.c --- xen-4.9.0/xen/arch/x86/smpboot.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/smpboot.c 2018-03-28 13:10:55.000000000 +0000 @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -302,6 +303,7 @@ set_current(idle_vcpu[cpu]); this_cpu(curr_vcpu) = idle_vcpu[cpu]; rdmsrl(MSR_EFER, this_cpu(efer)); + init_shadow_spec_ctrl_state(); /* * Just as during early bootstrap, it is convenient here to disable @@ -321,6 +323,9 @@ */ spin_debug_disable(); + get_cpu_info()->xen_cr3 = 0; + get_cpu_info()->pv_cr3 = this_cpu(root_pgt) ? __pa(this_cpu(root_pgt)) : 0; + load_system_tables(); /* Full exception support from here on in. */ @@ -635,6 +640,238 @@ set_cpu_state(CPU_STATE_DEAD); } +static int clone_mapping(const void *ptr, root_pgentry_t *rpt) +{ + unsigned long linear = (unsigned long)ptr, pfn; + unsigned int flags; + l3_pgentry_t *pl3e; + l2_pgentry_t *pl2e; + l1_pgentry_t *pl1e; + + /* + * Sanity check 'linear'. We only allow cloning from the Xen virtual + * range, and in particular, only from the directmap and .text ranges. + */ + if ( root_table_offset(linear) > ROOT_PAGETABLE_LAST_XEN_SLOT || + root_table_offset(linear) < ROOT_PAGETABLE_FIRST_XEN_SLOT ) + return -EINVAL; + + if ( linear < XEN_VIRT_START || + (linear >= XEN_VIRT_END && linear < DIRECTMAP_VIRT_START) ) + return -EINVAL; + + pl3e = l4e_to_l3e(idle_pg_table[root_table_offset(linear)]) + + l3_table_offset(linear); + + flags = l3e_get_flags(*pl3e); + ASSERT(flags & _PAGE_PRESENT); + if ( flags & _PAGE_PSE ) + { + pfn = (l3e_get_pfn(*pl3e) & ~((1UL << (2 * PAGETABLE_ORDER)) - 1)) | + (PFN_DOWN(linear) & ((1UL << (2 * PAGETABLE_ORDER)) - 1)); + flags &= ~_PAGE_PSE; + } + else + { + pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(linear); + flags = l2e_get_flags(*pl2e); + ASSERT(flags & _PAGE_PRESENT); + if ( flags & _PAGE_PSE ) + { + pfn = (l2e_get_pfn(*pl2e) & ~((1UL << PAGETABLE_ORDER) - 1)) | + (PFN_DOWN(linear) & ((1UL << PAGETABLE_ORDER) - 1)); + flags &= ~_PAGE_PSE; + } + else + { + pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(linear); + flags = l1e_get_flags(*pl1e); + if ( !(flags & _PAGE_PRESENT) ) + return 0; + pfn = l1e_get_pfn(*pl1e); + } + } + + if ( !(root_get_flags(rpt[root_table_offset(linear)]) & _PAGE_PRESENT) ) + { + pl3e = alloc_xen_pagetable(); + if ( !pl3e ) + return -ENOMEM; + clear_page(pl3e); + l4e_write(&rpt[root_table_offset(linear)], + l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR)); + } + else + pl3e = l4e_to_l3e(rpt[root_table_offset(linear)]); + + pl3e += l3_table_offset(linear); + + if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) + { + pl2e = alloc_xen_pagetable(); + if ( !pl2e ) + return -ENOMEM; + clear_page(pl2e); + l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR)); + } + else + { + ASSERT(!(l3e_get_flags(*pl3e) & _PAGE_PSE)); + pl2e = l3e_to_l2e(*pl3e); + } + + pl2e += l2_table_offset(linear); + + if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) + { + pl1e = alloc_xen_pagetable(); + if ( !pl1e ) + return -ENOMEM; + clear_page(pl1e); + l2e_write(pl2e, l2e_from_paddr(__pa(pl1e), __PAGE_HYPERVISOR)); + } + else + { + ASSERT(!(l2e_get_flags(*pl2e) & _PAGE_PSE)); + pl1e = l2e_to_l1e(*pl2e); + } + + pl1e += l1_table_offset(linear); + flags &= ~_PAGE_GLOBAL; + + if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT ) + { + ASSERT(l1e_get_pfn(*pl1e) == pfn); + ASSERT(l1e_get_flags(*pl1e) == flags); + } + else + l1e_write(pl1e, l1e_from_pfn(pfn, flags)); + + return 0; +} + +static __read_mostly int8_t opt_xpti = -1; +boolean_param("xpti", opt_xpti); +DEFINE_PER_CPU(root_pgentry_t *, root_pgt); + +static root_pgentry_t common_pgt; + +extern const char _stextentry[], _etextentry[]; + +static int setup_cpu_root_pgt(unsigned int cpu) +{ + root_pgentry_t *rpt; + unsigned int off; + int rc; + + if ( !opt_xpti ) + return 0; + + rpt = alloc_xen_pagetable(); + if ( !rpt ) + return -ENOMEM; + + clear_page(rpt); + per_cpu(root_pgt, cpu) = rpt; + + rpt[root_table_offset(RO_MPT_VIRT_START)] = + idle_pg_table[root_table_offset(RO_MPT_VIRT_START)]; + /* SH_LINEAR_PT inserted together with guest mappings. */ + /* PERDOMAIN inserted during context switch. */ + + /* One-time setup of common_pgt, which maps .text.entry and the stubs. */ + if ( unlikely(!root_get_intpte(common_pgt)) ) + { + const char *ptr; + + for ( rc = 0, ptr = _stextentry; + !rc && ptr < _etextentry; ptr += PAGE_SIZE ) + rc = clone_mapping(ptr, rpt); + + if ( rc ) + return rc; + + common_pgt = rpt[root_table_offset(XEN_VIRT_START)]; + } + + rpt[root_table_offset(XEN_VIRT_START)] = common_pgt; + + /* Install direct map page table entries for stack, IDT, and TSS. */ + for ( off = rc = 0; !rc && off < STACK_SIZE; off += PAGE_SIZE ) + if ( !memguard_is_stack_guard_page(off) ) + rc = clone_mapping(__va(__pa(stack_base[cpu])) + off, rpt); + + if ( !rc ) + rc = clone_mapping(idt_tables[cpu], rpt); + if ( !rc ) + rc = clone_mapping(&per_cpu(init_tss, cpu), rpt); + if ( !rc ) + rc = clone_mapping((void *)per_cpu(stubs.addr, cpu), rpt); + + return rc; +} + +static void cleanup_cpu_root_pgt(unsigned int cpu) +{ + root_pgentry_t *rpt = per_cpu(root_pgt, cpu); + unsigned int r; + unsigned long stub_linear = per_cpu(stubs.addr, cpu); + + if ( !rpt ) + return; + + per_cpu(root_pgt, cpu) = NULL; + + for ( r = root_table_offset(DIRECTMAP_VIRT_START); + r < root_table_offset(HYPERVISOR_VIRT_END); ++r ) + { + l3_pgentry_t *l3t; + unsigned int i3; + + if ( !(root_get_flags(rpt[r]) & _PAGE_PRESENT) ) + continue; + + l3t = l4e_to_l3e(rpt[r]); + + for ( i3 = 0; i3 < L3_PAGETABLE_ENTRIES; ++i3 ) + { + l2_pgentry_t *l2t; + unsigned int i2; + + if ( !(l3e_get_flags(l3t[i3]) & _PAGE_PRESENT) ) + continue; + + ASSERT(!(l3e_get_flags(l3t[i3]) & _PAGE_PSE)); + l2t = l3e_to_l2e(l3t[i3]); + + for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; ++i2 ) + { + if ( !(l2e_get_flags(l2t[i2]) & _PAGE_PRESENT) ) + continue; + + ASSERT(!(l2e_get_flags(l2t[i2]) & _PAGE_PSE)); + free_xen_pagetable(l2e_to_l1e(l2t[i2])); + } + + free_xen_pagetable(l2t); + } + + free_xen_pagetable(l3t); + } + + free_xen_pagetable(rpt); + + /* Also zap the stub mapping for this CPU. */ + if ( stub_linear ) + { + l3_pgentry_t *l3t = l4e_to_l3e(common_pgt); + l2_pgentry_t *l2t = l3e_to_l2e(l3t[l3_table_offset(stub_linear)]); + l1_pgentry_t *l1t = l2e_to_l1e(l2t[l2_table_offset(stub_linear)]); + + l1t[l2_table_offset(stub_linear)] = l1e_empty(); + } +} + static void cpu_smpboot_free(unsigned int cpu) { unsigned int order, socket = cpu_to_socket(cpu); @@ -656,6 +893,8 @@ if ( per_cpu(scratch_cpumask, cpu) != &scratch_cpu0mask ) free_cpumask_var(per_cpu(scratch_cpumask, cpu)); + cleanup_cpu_root_pgt(cpu); + if ( per_cpu(stubs.addr, cpu) ) { unsigned long mfn = per_cpu(stubs.mfn, cpu); @@ -724,6 +963,9 @@ if ( idt_tables[cpu] == NULL ) goto oom; memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES * sizeof(idt_entry_t)); + set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE); + set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); + set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) @@ -738,6 +980,9 @@ goto oom; per_cpu(stubs.addr, cpu) = stub_page + STUB_BUF_CPU_OFFS(cpu); + if ( setup_cpu_root_pgt(cpu) ) + goto oom; + if ( secondary_socket_cpumask == NULL && (secondary_socket_cpumask = xzalloc(cpumask_t)) == NULL ) goto oom; @@ -780,6 +1025,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { + int rc; + register_cpu_notifier(&cpu_smpboot_nfb); mtrr_aps_sync_begin(); @@ -793,6 +1040,24 @@ stack_base[0] = stack_start; + if ( opt_xpti < 0 ) + opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD; + + rc = setup_cpu_root_pgt(0); + if ( rc ) + panic("Error %d setting up PV root page table\n", rc); + if ( per_cpu(root_pgt, 0) ) + { + get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0)); + + /* + * All entry points which may need to switch page tables have to start + * with interrupts off. Re-write what pv_trap_init() has put there. + */ + _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3, + &int80_direct_trap); + } + set_nr_sockets(); socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets); @@ -862,6 +1127,9 @@ #if NR_CPUS > 2 * BITS_PER_LONG per_cpu(scratch_cpumask, cpu) = &scratch_cpu0mask; #endif + + get_cpu_info()->xen_cr3 = 0; + get_cpu_info()->pv_cr3 = 0; } static void @@ -1027,7 +1295,10 @@ void __init smp_cpus_done(void) { if ( nmi_watchdog == NMI_LOCAL_APIC ) + { + setup_apic_nmi_watchdog(); check_nmi_watchdog(); + } setup_ioapic_dest(); diff -Nru xen-4.9.0/xen/arch/x86/spec_ctrl.c xen-4.9.2/xen/arch/x86/spec_ctrl.c --- xen-4.9.0/xen/arch/x86/spec_ctrl.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/spec_ctrl.c 2018-03-28 13:10:55.000000000 +0000 @@ -0,0 +1,310 @@ +/****************************************************************************** + * arch/x86/spec_ctrl.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; If not, see . + * + * Copyright (c) 2017-2018 Citrix Systems Ltd. + */ +#include +#include +#include + +#include +#include +#include +#include +#include + +static enum ind_thunk { + THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ + THUNK_NONE, /* Missing compiler support for thunks. */ + + THUNK_RETPOLINE, + THUNK_LFENCE, + THUNK_JMP, +} opt_thunk __initdata = THUNK_DEFAULT; +static int8_t __initdata opt_ibrs = -1; +static bool __initdata opt_rsb_native = true; +static bool __initdata opt_rsb_vmexit = true; +bool __read_mostly opt_ibpb = true; +uint8_t __read_mostly default_bti_ist_info; + +static int __init parse_bti(const char *s) +{ + const char *ss; + int val, rc = 0; + + do { + ss = strchr(s, ','); + if ( !ss ) + ss = strchr(s, '\0'); + + if ( !strncmp(s, "thunk=", 6) ) + { + s += 6; + + if ( !strncmp(s, "retpoline", ss - s) ) + opt_thunk = THUNK_RETPOLINE; + else if ( !strncmp(s, "lfence", ss - s) ) + opt_thunk = THUNK_LFENCE; + else if ( !strncmp(s, "jmp", ss - s) ) + opt_thunk = THUNK_JMP; + else + rc = -EINVAL; + } + else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 ) + opt_ibrs = val; + else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) + opt_ibpb = val; + else if ( (val = parse_boolean("rsb_native", s, ss)) >= 0 ) + opt_rsb_native = val; + else if ( (val = parse_boolean("rsb_vmexit", s, ss)) >= 0 ) + opt_rsb_vmexit = val; + else + rc = -EINVAL; + + s = ss + 1; + } while ( *ss ); + + return rc; +} +custom_param("bti", parse_bti); + +static void __init print_details(enum ind_thunk thunk) +{ + unsigned int _7d0 = 0, e8b = 0, tmp; + + /* Collect diagnostics about available mitigations. */ + if ( boot_cpu_data.cpuid_level >= 7 ) + cpuid_count(7, 0, &tmp, &tmp, &tmp, &_7d0); + if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 ) + cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp); + + printk(XENLOG_DEBUG "Speculative mitigation facilities:\n"); + + /* Hardware features which pertain to speculative mitigations. */ + if ( (_7d0 & (cpufeat_mask(X86_FEATURE_IBRSB) | + cpufeat_mask(X86_FEATURE_STIBP))) || + (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ) + printk(XENLOG_DEBUG " Hardware features:%s%s%s\n", + (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : ""); + + /* Compiled-in support which pertains to BTI mitigations. */ + if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) + printk(XENLOG_DEBUG " Compiled-in support: INDIRECT_THUNK\n"); + + printk(XENLOG_INFO + "BTI mitigations: Thunk %s, Others:%s%s%s%s\n", + thunk == THUNK_NONE ? "N/A" : + thunk == THUNK_RETPOLINE ? "RETPOLINE" : + thunk == THUNK_LFENCE ? "LFENCE" : + thunk == THUNK_JMP ? "JMP" : "?", + boot_cpu_has(X86_FEATURE_XEN_IBRS_SET) ? " IBRS+" : + boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR) ? " IBRS-" : "", + opt_ibpb ? " IBPB" : "", + boot_cpu_has(X86_FEATURE_RSB_NATIVE) ? " RSB_NATIVE" : "", + boot_cpu_has(X86_FEATURE_RSB_VMEXIT) ? " RSB_VMEXIT" : ""); +} + +/* Calculate whether Retpoline is known-safe on this CPU. */ +static bool __init retpoline_safe(void) +{ + unsigned int ucode_rev = this_cpu(ucode_cpu_info).cpu_sig.rev; + + if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) + return true; + + if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + boot_cpu_data.x86 != 6 ) + return false; + + switch ( boot_cpu_data.x86_model ) + { + case 0x17: /* Penryn */ + case 0x1d: /* Dunnington */ + case 0x1e: /* Nehalem */ + case 0x1f: /* Auburndale / Havendale */ + case 0x1a: /* Nehalem EP */ + case 0x2e: /* Nehalem EX */ + case 0x25: /* Westmere */ + case 0x2c: /* Westmere EP */ + case 0x2f: /* Westmere EX */ + case 0x2a: /* SandyBridge */ + case 0x2d: /* SandyBridge EP/EX */ + case 0x3a: /* IvyBridge */ + case 0x3e: /* IvyBridge EP/EX */ + case 0x3c: /* Haswell */ + case 0x3f: /* Haswell EX/EP */ + case 0x45: /* Haswell D */ + case 0x46: /* Haswell H */ + return true; + + /* + * Broadwell processors are retpoline-safe after specific microcode + * versions. + */ + case 0x3d: /* Broadwell */ + return ucode_rev >= 0x28; + case 0x47: /* Broadwell H */ + return ucode_rev >= 0x1b; + case 0x4f: /* Broadwell EP/EX */ + return ucode_rev >= 0xb000025; + case 0x56: /* Broadwell D */ + return false; /* TBD. */ + + /* + * Skylake and later processors are not retpoline-safe. + */ + default: + return false; + } +} + +void __init init_speculation_mitigations(void) +{ + enum ind_thunk thunk = THUNK_DEFAULT; + bool ibrs = false; + + /* + * Has the user specified any custom BTI mitigations? If so, follow their + * instructions exactly and disable all heuristics. + */ + if ( opt_thunk != THUNK_DEFAULT || opt_ibrs != -1 ) + { + thunk = opt_thunk; + ibrs = !!opt_ibrs; + } + else + { + /* + * Evaluate the safest Branch Target Injection mitigations to use. + * First, begin with compiler-aided mitigations. + */ + if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) + { + /* + * AMD's recommended mitigation is to set lfence as being dispatch + * serialising, and to use IND_THUNK_LFENCE. + */ + if ( cpu_has_lfence_dispatch ) + thunk = THUNK_LFENCE; + /* + * On Intel hardware, we'd like to use retpoline in preference to + * IBRS, but only if it is safe on this hardware. + */ + else if ( retpoline_safe() ) + thunk = THUNK_RETPOLINE; + else if ( boot_cpu_has(X86_FEATURE_IBRSB) ) + ibrs = true; + } + /* Without compiler thunk support, use IBRS if available. */ + else if ( boot_cpu_has(X86_FEATURE_IBRSB) ) + ibrs = true; + } + + /* + * Supplimentary minor adjustments. Without compiler support, there are + * no thunks. + */ + if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) ) + thunk = THUNK_NONE; + + /* + * If IBRS is in use and thunks are compiled in, there is no point + * suffering extra overhead. Switch to the least-overhead thunk. + */ + if ( ibrs && thunk == THUNK_DEFAULT ) + thunk = THUNK_JMP; + + /* + * If there are still no thunk preferences, the compiled default is + * actually retpoline, and it is better than nothing. + */ + if ( thunk == THUNK_DEFAULT ) + thunk = THUNK_RETPOLINE; + + /* Apply the chosen settings. */ + if ( thunk == THUNK_LFENCE ) + setup_force_cpu_cap(X86_FEATURE_IND_THUNK_LFENCE); + else if ( thunk == THUNK_JMP ) + setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP); + + if ( boot_cpu_has(X86_FEATURE_IBRSB) ) + { + /* + * Even if we've chosen to not have IBRS set in Xen context, we still + * need the IBRS entry/exit logic to virtualise IBRS support for + * guests. + */ + if ( ibrs ) + setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_SET); + else + setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR); + + default_bti_ist_info |= BTI_IST_WRMSR | ibrs; + } + + /* + * PV guests can poison the RSB to any virtual address from which + * they can execute a call instruction. This is necessarily outside + * of the Xen supervisor mappings. + * + * With SMEP enabled, the processor won't speculate into user mappings. + * Therefore, in this case, we don't need to worry about poisoned entries + * from 64bit PV guests. + * + * 32bit PV guest kernels run in ring 1, so use supervisor mappings. + * If a processors speculates to 32bit PV guest kernel mappings, it is + * speculating in 64bit supervisor mode, and can leak data. + */ + if ( opt_rsb_native ) + { + setup_force_cpu_cap(X86_FEATURE_RSB_NATIVE); + default_bti_ist_info |= BTI_IST_RSB; + } + + /* + * HVM guests can always poison the RSB to point at Xen supervisor + * mappings. + */ + if ( opt_rsb_vmexit ) + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); + + /* Check we have hardware IBPB support before using it... */ + if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) ) + opt_ibpb = false; + + /* (Re)init BSP state now that default_bti_ist_info has been calculated. */ + init_shadow_spec_ctrl_state(); + + print_details(thunk); +} + +static void __init __maybe_unused build_assertions(void) +{ + /* The optimised assembly relies on this alias. */ + BUILD_BUG_ON(BTI_IST_IBRS != SPEC_CTRL_IBRS); +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/xen/arch/x86/srat.c xen-4.9.2/xen/arch/x86/srat.c --- xen-4.9.0/xen/arch/x86/srat.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/srat.c 2018-03-28 13:10:55.000000000 +0000 @@ -110,8 +110,8 @@ for (i = 0; i < num_node_memblks; i++) { struct node *nd = &node_memblk_range[i]; - if (nd->start <= start && nd->end > end && - memblk_nodeid[i] == node ) + if (nd->start <= start && nd->end >= end && + memblk_nodeid[i] == node) return 1; } @@ -372,7 +372,7 @@ } start = e820.map[i].addr; - end = e820.map[i].addr + e820.map[i].size - 1; + end = e820.map[i].addr + e820.map[i].size; do { found = 0; diff -Nru xen-4.9.0/xen/arch/x86/time.c xen-4.9.2/xen/arch/x86/time.c --- xen-4.9.0/xen/arch/x86/time.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/time.c 2018-03-28 13:10:55.000000000 +0000 @@ -67,6 +67,7 @@ }; static DEFINE_PER_CPU(struct cpu_time, cpu_time); +DEFINE_PER_CPU(uint32_t, tsc_aux); /* Calibrate all CPUs to platform timer every EPOCH. */ #define EPOCH MILLISECS(1000) diff -Nru xen-4.9.0/xen/arch/x86/traps.c xen-4.9.2/xen/arch/x86/traps.c --- xen-4.9.0/xen/arch/x86/traps.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/traps.c 2018-03-28 13:10:55.000000000 +0000 @@ -100,7 +100,8 @@ DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table); /* Master table, used by CPU0. */ -idt_entry_t idt_table[IDT_ENTRIES]; +idt_entry_t __section(".bss.page_aligned") __aligned(PAGE_SIZE) + idt_table[IDT_ENTRIES]; /* Pointer to the IDT of every CPU. */ idt_entry_t *idt_tables[NR_CPUS] __read_mostly; @@ -538,7 +539,7 @@ show_execution_state(regs); else printk(XENLOG_ERR "CPU%d @ %04x:%08lx (%pS)\n", cpu, regs->cs, regs->rip, - guest_mode(regs) ? _p(regs->rip) : NULL); + guest_mode(regs) ? NULL : _p(regs->rip)); cpumask_clear_cpu(cpu, &show_state_mask); return 1; @@ -1486,12 +1487,8 @@ */ if ( paging_mode_enabled(d) && !paging_mode_external(d) ) { - int ret; + int ret = paging_fault(addr, regs); - /* Logdirty mode is the only expected paging mode for PV guests. */ - ASSERT(paging_mode_only_log_dirty(d)); - - ret = paging_fault(addr, regs); if ( ret == EXCRET_fault_fixed ) trace_trap_two_addr(TRC_PV_PAGING_FIXUP, regs->rip, addr); return ret; @@ -1921,12 +1918,12 @@ { /* If in user mode, switch to kernel mode just to read I/O bitmap. */ int user_mode = !(v->arch.flags & TF_kernel_mode); -#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) +#define TOGGLE_MODE() if ( user_mode ) toggle_guest_pt(v) if ( iopl_ok(v, regs) ) return 1; - if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) ) + if ( (port + bytes) <= v->arch.pv_vcpu.iobmp_limit ) { union { uint8_t bytes[2]; uint16_t mask; } x; @@ -2151,37 +2148,54 @@ typedef void io_emul_stub_t(struct cpu_user_regs *); +void __x86_indirect_thunk_rcx(void); + static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode, unsigned int port, unsigned int bytes) { + struct stubs *this_stubs = &this_cpu(stubs); + unsigned long stub_va = this_stubs->addr + STUB_BUF_SIZE / 2; + if ( !ctxt->io_emul_stub ) - ctxt->io_emul_stub = map_domain_page(_mfn(this_cpu(stubs.mfn))) + - (this_cpu(stubs.addr) & - ~PAGE_MASK) + - STUB_BUF_SIZE / 2; + ctxt->io_emul_stub = + map_domain_page(_mfn(this_stubs->mfn)) + (stub_va & ~PAGE_MASK); /* movq $host_to_guest_gpr_switch,%rcx */ ctxt->io_emul_stub[0] = 0x48; ctxt->io_emul_stub[1] = 0xb9; *(void **)&ctxt->io_emul_stub[2] = (void *)host_to_guest_gpr_switch; + +#ifdef CONFIG_INDIRECT_THUNK + /* callq __x86_indirect_thunk_rcx */ + ctxt->io_emul_stub[10] = 0xe8; + *(int32_t *)&ctxt->io_emul_stub[11] = + (long)__x86_indirect_thunk_rcx - (stub_va + 11 + 4); +#else /* callq *%rcx */ ctxt->io_emul_stub[10] = 0xff; ctxt->io_emul_stub[11] = 0xd1; + /* TODO: untangle ideal_nops from init/livepatch Kconfig options. */ + memcpy(&ctxt->io_emul_stub[12], "\x0f\x1f\x00", 3); /* P6_NOP3 */ +#endif + /* data16 or nop */ - ctxt->io_emul_stub[12] = (bytes != 2) ? 0x90 : 0x66; + ctxt->io_emul_stub[15] = (bytes != 2) ? 0x90 : 0x66; /* */ - ctxt->io_emul_stub[13] = opcode; + ctxt->io_emul_stub[16] = opcode; /* imm8 or nop */ - ctxt->io_emul_stub[14] = !(opcode & 8) ? port : 0x90; + ctxt->io_emul_stub[17] = !(opcode & 8) ? port : 0x90; /* ret (jumps to guest_to_host_gpr_switch) */ - ctxt->io_emul_stub[15] = 0xc3; - BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 16); + ctxt->io_emul_stub[18] = 0xc3; + BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 19); if ( ioemul_handle_quirk ) - ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[12], ctxt->ctxt.regs); + { + BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 15 + 10); + ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[15], ctxt->ctxt.regs); + } /* Handy function-typed pointer to the stub. */ - return (void *)(this_cpu(stubs.addr) + STUB_BUF_SIZE / 2); + return (void *)stub_va; } static int priv_op_read_io(unsigned int port, unsigned int bytes, @@ -2204,7 +2218,6 @@ io_emul_stub_t *io_emul = io_emul_stub_setup(poc, ctxt->opcode, port, bytes); - mark_regs_dirty(ctxt->regs); io_emul(ctxt->regs); return X86EMUL_DONE; } @@ -2234,7 +2247,6 @@ io_emul_stub_t *io_emul = io_emul_stub_setup(poc, ctxt->opcode, port, bytes); - mark_regs_dirty(ctxt->regs); io_emul(ctxt->regs); if ( (bytes == 1) && pv_post_outb_hook ) pv_post_outb_hook(port, val); @@ -2625,18 +2637,30 @@ *val = 0; return X86EMUL_OKAY; + case MSR_PRED_CMD: + /* Write-only */ + break; + + case MSR_SPEC_CTRL: + if ( !currd->arch.cpuid->feat.ibrsb ) + break; + *val = curr->arch.spec_ctrl; + return X86EMUL_OKAY; + case MSR_INTEL_PLATFORM_INFO: - if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - rdmsr_safe(MSR_INTEL_PLATFORM_INFO, *val) ) + if ( !boot_cpu_has(X86_FEATURE_MSR_PLATFORM_INFO) ) break; *val = 0; if ( this_cpu(cpuid_faulting_enabled) ) *val |= MSR_PLATFORM_INFO_CPUID_FAULTING; return X86EMUL_OKAY; + case MSR_ARCH_CAPABILITIES: + /* Not implemented yet. */ + break; + case MSR_INTEL_MISC_FEATURES_ENABLES: - if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES, *val) ) + if ( !boot_cpu_has(X86_FEATURE_MSR_MISC_FEATURES) ) break; *val = 0; if ( curr->arch.cpuid_faulting ) @@ -2834,15 +2858,40 @@ return X86EMUL_OKAY; case MSR_INTEL_PLATFORM_INFO: - if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - val || rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val) ) - break; + case MSR_ARCH_CAPABILITIES: + /* The MSR is read-only. */ + break; + + case MSR_SPEC_CTRL: + if ( !currd->arch.cpuid->feat.ibrsb ) + break; /* MSR available? */ + + /* + * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored) + * when STIBP isn't enumerated in hardware. + */ + + if ( val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) ) + break; /* Rsvd bit set? */ + + curr->arch.spec_ctrl = val; + return X86EMUL_OKAY; + + case MSR_PRED_CMD: + if ( !currd->arch.cpuid->feat.ibrsb && !currd->arch.cpuid->extd.ibpb ) + break; /* MSR available? */ + + /* + * The only defined behaviour is when writing PRED_CMD_IBPB. In + * practice, real hardware accepts any value without faulting. + */ + if ( val & PRED_CMD_IBPB ) + wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB); return X86EMUL_OKAY; case MSR_INTEL_MISC_FEATURES_ENABLES: - if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - (val & ~MSR_MISC_FEATURES_CPUID_FAULTING) || - rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES, temp) ) + if ( !boot_cpu_has(X86_FEATURE_MSR_MISC_FEATURES) || + (val & ~MSR_MISC_FEATURES_CPUID_FAULTING) ) break; if ( (val & MSR_MISC_FEATURES_CPUID_FAULTING) && !this_cpu(cpuid_faulting_enabled) ) @@ -3628,7 +3677,7 @@ void do_nmi(const struct cpu_user_regs *regs) { unsigned int cpu = smp_processor_id(); - unsigned char reason; + unsigned char reason = 0; bool_t handle_unknown = 0; ++nmi_count(cpu); @@ -3636,6 +3685,16 @@ if ( nmi_callback(regs, cpu) ) return; + /* + * Accessing port 0x61 may trap to SMM which has been actually + * observed on some production SKX servers. This SMI sometimes + * takes enough time for the next NMI tick to happen. By reading + * this port before we re-arm the NMI watchdog, we reduce the chance + * of having an NMI watchdog expire while in the SMI handler. + */ + if ( cpu == 0 ) + reason = inb(0x61); + if ( (nmi_watchdog == NMI_NONE) || (!nmi_watchdog_tick(regs) && watchdog_force) ) handle_unknown = 1; @@ -3643,7 +3702,6 @@ /* Only the BSP gets external NMIs from the system. */ if ( cpu == 0 ) { - reason = inb(0x61); if ( reason & 0x80 ) pci_serr_error(regs); if ( reason & 0x40 ) diff -Nru xen-4.9.0/xen/arch/x86/x86_64/asm-offsets.c xen-4.9.2/xen/arch/x86/x86_64/asm-offsets.c --- xen-4.9.0/xen/arch/x86/x86_64/asm-offsets.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/x86_64/asm-offsets.c 2018-03-28 13:10:55.000000000 +0000 @@ -88,6 +88,8 @@ OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss); OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl); OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags); + OFFSET(VCPU_cr3, struct vcpu, arch.cr3); + OFFSET(VCPU_arch_spec_ctrl, struct vcpu, arch.spec_ctrl); OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending); OFFSET(VCPU_mce_pending, struct vcpu, mce_pending); OFFSET(VCPU_nmi_old_mask, struct vcpu, nmi_state.old_mask); @@ -137,6 +139,11 @@ OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id); OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); OFFSET(CPUINFO_cr4, struct cpu_info, cr4); + OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3); + OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3); + OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl); + OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl); + OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info); DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); BLANK(); diff -Nru xen-4.9.0/xen/arch/x86/x86_64/compat/entry.S xen-4.9.2/xen/arch/x86/x86_64/compat/entry.S --- xen-4.9.0/xen/arch/x86/x86_64/compat/entry.S 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/x86_64/compat/entry.S 2018-03-28 13:10:55.000000000 +0000 @@ -11,10 +11,17 @@ #include #include + .section .text.entry, "ax", @progbits + ENTRY(entry_int82) ASM_CLAC pushq $0 - SAVE_VOLATILE type=HYPERCALL_VECTOR compat=1 + movl $HYPERCALL_VECTOR, 4(%rsp) + SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ + + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + CR4_PV32_RESTORE GET_CURRENT(bx) @@ -58,7 +65,6 @@ /* %rbx: struct vcpu */ compat_process_softirqs: sti - andl $~TRAP_regs_partial,UREGS_entry_vector(%rsp) call do_softirq jmp compat_test_all_events @@ -140,6 +146,12 @@ .popsection or $X86_EFLAGS_IF,%r11 mov %r11d,UREGS_eflags(%rsp) + + mov VCPU_arch_spec_ctrl(%rbx), %eax + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ + SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ + RESTORE_ALL adj=8 compat=1 .Lft0: iretq _ASM_PRE_EXTABLE(.Lft0, handle_exception) @@ -187,7 +199,7 @@ /* See lstar_enter for entry register state. */ ENTRY(cstar_enter) - sti + /* sti could live here when we don't switch page tables below. */ CR4_PV32_RESTORE movq 8(%rsp),%rax /* Restore %rax. */ movq $FLAT_KERNEL_SS,8(%rsp) @@ -195,8 +207,24 @@ pushq $FLAT_USER_CS32 pushq %rcx pushq $0 - SAVE_VOLATILE TRAP_syscall - GET_CURRENT(bx) + movl $TRAP_syscall, 4(%rsp) + SAVE_ALL + + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + GET_STACK_END(bx) + mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx + neg %rcx + jz .Lcstar_cr3_okay + mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) + neg %rcx + mov %rcx, %cr3 + movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) +.Lcstar_cr3_okay: + sti + + __GET_CURRENT(bx) movq VCPU_domain(%rbx),%rcx cmpb $0,DOMAIN_is_32bit_pv(%rcx) je switch_to_kernel @@ -242,6 +270,9 @@ call compat_create_bounce_frame jmp compat_test_all_events + /* compat_create_bounce_frame & helpers don't need to be in .text.entry */ + .text + /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */ /* {[ERRCODE,] EIP, CS, EFLAGS, [ESP, SS]} */ /* %rdx: trap_bounce, %rbx: struct vcpu */ diff -Nru xen-4.9.0/xen/arch/x86/x86_64/entry.S xen-4.9.2/xen/arch/x86/x86_64/entry.S --- xen-4.9.0/xen/arch/x86/x86_64/entry.S 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/x86_64/entry.S 2018-03-28 13:10:55.000000000 +0000 @@ -12,6 +12,8 @@ #include #include + .section .text.entry, "ax", @progbits + /* %rbx: struct vcpu */ ENTRY(switch_to_kernel) leaq VCPU_trap_bounce(%rbx),%rdx @@ -35,6 +37,48 @@ /* %rbx: struct vcpu, interrupts disabled */ restore_all_guest: ASSERT_INTERRUPTS_DISABLED + + /* Stash guest SPEC_CTRL value while we can read struct vcpu. */ + mov VCPU_arch_spec_ctrl(%rbx), %r15d + + /* Copy guest mappings and switch to per-CPU root page table. */ + mov VCPU_cr3(%rbx), %r9 + GET_STACK_END(dx) + mov STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi + movabs $PADDR_MASK & PAGE_MASK, %rsi + movabs $DIRECTMAP_VIRT_START, %rcx + mov %rdi, %rax + and %rsi, %rdi + jz .Lrag_keep_cr3 + and %r9, %rsi + add %rcx, %rdi + add %rcx, %rsi + mov $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx + mov root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8 + mov %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi) + rep movsq + mov $ROOT_PAGETABLE_ENTRIES - \ + ROOT_PAGETABLE_LAST_XEN_SLOT - 1, %ecx + sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \ + ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rsi + sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \ + ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi + rep movsq + mov STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi + mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx) + mov %rdi, %rsi + and $~X86_CR4_PGE, %rdi + mov %rdi, %cr4 + mov %rax, %cr3 + mov %rsi, %cr4 +.Lrag_keep_cr3: + + /* Restore stashed SPEC_CTRL value. */ + mov %r15d, %eax + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ + SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ + RESTORE_ALL testw $TRAP_syscall,4(%rsp) jz iret_exit_to_guest @@ -69,6 +113,30 @@ ALIGN /* No special register assumptions. */ restore_all_xen: + /* + * Check whether we need to switch to the per-CPU page tables, in + * case we return to late PV exit code (from an NMI or #MC). + */ + GET_STACK_END(bx) + mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rdx + mov STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax + test %rdx, %rdx + /* + * Ideally the condition would be "nsz", but such doesn't exist, + * so "g" will have to do. + */ +UNLIKELY_START(g, exit_cr3) + mov %cr4, %rdi + mov %rdi, %rsi + and $~X86_CR4_PGE, %rdi + mov %rdi, %cr4 + mov %rax, %cr3 + mov %rsi, %cr4 +UNLIKELY_END(exit_cr3) + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ + SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */ + RESTORE_ALL adj=8 iretq @@ -89,15 +157,31 @@ * %ss must be saved into the space left by the trampoline. */ ENTRY(lstar_enter) - sti + /* sti could live here when we don't switch page tables below. */ movq 8(%rsp),%rax /* Restore %rax. */ movq $FLAT_KERNEL_SS,8(%rsp) pushq %r11 pushq $FLAT_KERNEL_CS64 pushq %rcx pushq $0 - SAVE_VOLATILE TRAP_syscall - GET_CURRENT(bx) + movl $TRAP_syscall, 4(%rsp) + SAVE_ALL + + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + GET_STACK_END(bx) + mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx + neg %rcx + jz .Llstar_cr3_okay + mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) + neg %rcx + mov %rcx, %cr3 + movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) +.Llstar_cr3_okay: + sti + + __GET_CURRENT(bx) testb $TF_kernel_mode,VCPU_thread_flags(%rbx) jz switch_to_kernel @@ -138,7 +222,6 @@ /* %rbx: struct vcpu */ process_softirqs: sti - SAVE_PRESERVED call do_softirq jmp test_all_events @@ -179,7 +262,7 @@ jmp test_all_events ENTRY(sysenter_entry) - sti + /* sti could live here when we don't switch page tables below. */ pushq $FLAT_USER_SS pushq $0 pushfq @@ -188,8 +271,26 @@ pushq $3 /* ring 3 null cs */ pushq $0 /* null rip */ pushq $0 - SAVE_VOLATILE TRAP_syscall - GET_CURRENT(bx) + movl $TRAP_syscall, 4(%rsp) + SAVE_ALL + + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + GET_STACK_END(bx) + /* PUSHF above has saved EFLAGS.IF clear (the caller had it set). */ + orl $X86_EFLAGS_IF, UREGS_eflags(%rsp) + mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx + neg %rcx + jz .Lsyse_cr3_okay + mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) + neg %rcx + mov %rcx, %cr3 + movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) +.Lsyse_cr3_okay: + sti + + __GET_CURRENT(bx) cmpb $0,VCPU_sysenter_disables_events(%rbx) movq VCPU_sysenter_addr(%rbx),%rax setne %cl @@ -205,7 +306,6 @@ leal (,%rcx,TBF_INTERRUPT),%ecx UNLIKELY_START(z, sysenter_gpf) movq VCPU_trap_ctxt(%rbx),%rsi - SAVE_PRESERVED movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) movl %eax,TRAPBOUNCE_error_code(%rdx) movq TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%rax @@ -223,7 +323,22 @@ ENTRY(int80_direct_trap) ASM_CLAC pushq $0 - SAVE_VOLATILE 0x80 + movl $0x80, 4(%rsp) + SAVE_ALL + + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + GET_STACK_END(bx) + mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx + neg %rcx + jz .Lint80_cr3_okay + mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) + neg %rcx + mov %rcx, %cr3 + movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) +.Lint80_cr3_okay: + sti cmpb $0,untrusted_msi(%rip) UNLIKELY_START(ne, msi_check) @@ -231,7 +346,7 @@ call check_for_unexpected_msi UNLIKELY_END(msi_check) - GET_CURRENT(bx) + __GET_CURRENT(bx) /* Check that the callback is non-null. */ leaq VCPU_int80_bounce(%rbx),%rdx @@ -251,12 +366,14 @@ * IDT entry with DPL==0. */ movl $((0x80 << 3) | X86_XEC_IDT),UREGS_error_code(%rsp) - SAVE_PRESERVED movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) /* A GPF wouldn't have incremented the instruction pointer. */ subq $2,UREGS_rip(%rsp) jmp handle_exception_saved + /* create_bounce_frame & helpers don't need to be in .text.entry */ + .text + /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK: */ /* { RCX, R11, [ERRCODE,] RIP, CS, RFLAGS, RSP, SS } */ /* %rdx: trap_bounce, %rbx: struct vcpu */ @@ -387,11 +504,35 @@ jmp asm_domain_crash_synchronous /* Does not return */ .popsection + .section .text.entry, "ax", @progbits + ENTRY(common_interrupt) SAVE_ALL CLAC + + GET_STACK_END(14) + + SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx + mov %rcx, %r15 + neg %rcx + jz .Lintr_cr3_okay + jns .Lintr_cr3_load + mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + neg %rcx +.Lintr_cr3_load: + mov %rcx, %cr3 + xor %ecx, %ecx + mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + testb $3, UREGS_cs(%rsp) + cmovnz %rcx, %r15 +.Lintr_cr3_okay: + CR4_PV32_RESTORE movq %rsp,%rdi callq do_IRQ + mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) jmp ret_from_intr /* No special register assumptions. */ @@ -409,6 +550,27 @@ /* No special register assumptions. */ GLOBAL(handle_exception) SAVE_ALL CLAC + + GET_STACK_END(14) + + SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx + mov %rcx, %r15 + neg %rcx + jz .Lxcpt_cr3_okay + jns .Lxcpt_cr3_load + mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + neg %rcx +.Lxcpt_cr3_load: + mov %rcx, %cr3 + xor %ecx, %ecx + mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + testb $3, UREGS_cs(%rsp) + cmovnz %rcx, %r15 +.Lxcpt_cr3_okay: + handle_exception_saved: GET_CURRENT(bx) testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp) @@ -472,7 +634,9 @@ movzbl UREGS_entry_vector(%rsp),%eax leaq exception_table(%rip),%rdx PERFC_INCR(exceptions, %rax, %rbx) - callq *(%rdx,%rax,8) + mov (%rdx, %rax, 8), %rdx + INDIRECT_CALL %rdx + mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) testb $3,UREGS_cs(%rsp) jz restore_all_xen leaq VCPU_trap_bounce(%rbx),%rdx @@ -505,6 +669,7 @@ rep; movsq # make room for ec/ev 1: movq UREGS_error_code(%rsp),%rax # ec/ev movq %rax,UREGS_kernel_sizeof(%rsp) + mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) jmp restore_all_xen # return to fixup code /* No special register assumptions. */ @@ -583,6 +748,21 @@ movl $TRAP_double_fault,4(%rsp) /* Set AC to reduce chance of further SMAP faults */ SAVE_ALL STAC + + GET_STACK_END(14) + + SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx + test %rbx, %rbx + jz .Ldblf_cr3_okay + jns .Ldblf_cr3_load + neg %rbx +.Ldblf_cr3_load: + mov %rbx, %cr3 +.Ldblf_cr3_okay: + movq %rsp,%rdi call do_double_fault BUG /* do_double_fault() shouldn't return. */ @@ -601,10 +781,32 @@ movl $TRAP_nmi,4(%rsp) handle_ist_exception: SAVE_ALL CLAC + + GET_STACK_END(14) + + SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx + mov %rcx, %r15 + neg %rcx + jz .List_cr3_okay + jns .List_cr3_load + mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + neg %rcx +.List_cr3_load: + mov %rcx, %cr3 + movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14) +.List_cr3_okay: + CR4_PV32_RESTORE testb $3,UREGS_cs(%rsp) jz 1f - /* Interrupted guest context. Copy the context to stack bottom. */ + /* + * Interrupted guest context. Clear the restore value for xen_cr3 + * and copy the context to stack bottom. + */ + xor %r15, %r15 GET_CPUINFO_FIELD(guest_cpu_user_regs,di) movq %rsp,%rsi movl $UREGS_kernel_sizeof/8,%ecx @@ -613,7 +815,9 @@ 1: movq %rsp,%rdi movzbl UREGS_entry_vector(%rsp),%eax leaq exception_table(%rip),%rdx - callq *(%rdx,%rax,8) + mov (%rdx, %rax, 8), %rdx + INDIRECT_CALL %rdx + mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) cmpb $TRAP_nmi,UREGS_entry_vector(%rsp) jne ret_from_intr @@ -661,8 +865,7 @@ -.section .rodata, "a", @progbits - + .pushsection .rodata, "a", @progbits ENTRY(exception_table) .quad do_trap .quad do_debug @@ -688,9 +891,10 @@ .quad do_reserved_trap /* Architecturally reserved exceptions. */ .endr .size exception_table, . - exception_table + .popsection /* Table of automatically generated entry points. One per vector. */ - .section .init.rodata, "a", @progbits + .pushsection .init.rodata, "a", @progbits GLOBAL(autogen_entrypoints) /* pop into the .init.rodata section and record an entry point. */ .macro entrypoint ent @@ -699,7 +903,7 @@ .popsection .endm - .text + .popsection autogen_stubs: /* Automatically generated stubs. */ vec = 0 diff -Nru xen-4.9.0/xen/arch/x86/x86_64/mm.c xen-4.9.2/xen/arch/x86/x86_64/mm.c --- xen-4.9.0/xen/arch/x86/x86_64/mm.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/x86_64/mm.c 2018-03-28 13:10:55.000000000 +0000 @@ -1075,7 +1075,7 @@ " jmp 1b \n" ".previous \n" _ASM_EXTABLE(1b, 2b) - : : "r" (base&0xffff) ); + : "+r" (base) ); break; default: diff -Nru xen-4.9.0/xen/arch/x86/x86_64/traps.c xen-4.9.2/xen/arch/x86/x86_64/traps.c --- xen-4.9.0/xen/arch/x86/x86_64/traps.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/x86_64/traps.c 2018-03-28 13:10:55.000000000 +0000 @@ -37,6 +37,22 @@ enum context { CTXT_hypervisor, CTXT_pv_guest, CTXT_hvm_guest }; +/* (ab)use crs[5..7] for fs/gs bases. */ +static void read_registers(struct cpu_user_regs *regs, unsigned long crs[8]) +{ + crs[0] = read_cr0(); + crs[2] = read_cr2(); + crs[3] = read_cr3(); + crs[4] = read_cr4(); + regs->ds = read_sreg(ds); + regs->es = read_sreg(es); + regs->fs = read_sreg(fs); + regs->gs = read_sreg(gs); + crs[5] = rdfsbase(); + crs[6] = rdgsbase(); + rdmsrl(MSR_SHADOW_GS_BASE, crs[7]); +} + static void _show_registers( const struct cpu_user_regs *regs, unsigned long crs[8], enum context context, const struct vcpu *v) @@ -65,16 +81,13 @@ regs->rbp, regs->rsp, regs->r8); printk("r9: %016lx r10: %016lx r11: %016lx\n", regs->r9, regs->r10, regs->r11); - if ( !(regs->entry_vector & TRAP_regs_partial) ) - { - printk("r12: %016lx r13: %016lx r14: %016lx\n", - regs->r12, regs->r13, regs->r14); - printk("r15: %016lx cr0: %016lx cr4: %016lx\n", - regs->r15, crs[0], crs[4]); - } - else - printk("cr0: %016lx cr4: %016lx\n", crs[0], crs[4]); + printk("r12: %016lx r13: %016lx r14: %016lx\n", + regs->r12, regs->r13, regs->r14); + printk("r15: %016lx cr0: %016lx cr4: %016lx\n", + regs->r15, crs[0], crs[4]); printk("cr3: %016lx cr2: %016lx\n", crs[3], crs[2]); + printk("fsb: %016lx gsb: %016lx gss: %016lx\n", + crs[5], crs[6], crs[7]); printk("ds: %04x es: %04x fs: %04x gs: %04x " "ss: %04x cs: %04x\n", regs->ds, regs->es, regs->fs, @@ -104,13 +117,18 @@ fault_regs.es = sreg.sel; hvm_get_segment_register(v, x86_seg_fs, &sreg); fault_regs.fs = sreg.sel; + fault_crs[5] = sreg.base; hvm_get_segment_register(v, x86_seg_gs, &sreg); fault_regs.gs = sreg.sel; + fault_crs[6] = sreg.base; hvm_get_segment_register(v, x86_seg_ss, &sreg); fault_regs.ss = sreg.sel; + fault_crs[7] = hvm_get_shadow_gs_base(v); } else { + read_registers(&fault_regs, fault_crs); + if ( guest_mode(regs) ) { context = CTXT_pv_guest; @@ -121,14 +139,6 @@ context = CTXT_hypervisor; fault_crs[2] = read_cr2(); } - - fault_crs[0] = read_cr0(); - fault_crs[3] = read_cr3(); - fault_crs[4] = read_cr4(); - fault_regs.ds = read_sreg(ds); - fault_regs.es = read_sreg(es); - fault_regs.fs = read_sreg(fs); - fault_regs.gs = read_sreg(gs); } print_xen_info(); @@ -147,6 +157,7 @@ void vcpu_show_registers(const struct vcpu *v) { const struct cpu_user_regs *regs = &v->arch.user_regs; + bool kernel = guest_kernel_mode(v, regs); unsigned long crs[8]; /* Only handle PV guests for now */ @@ -155,10 +166,13 @@ crs[0] = v->arch.pv_vcpu.ctrlreg[0]; crs[2] = arch_get_cr2(v); - crs[3] = pagetable_get_paddr(guest_kernel_mode(v, regs) ? + crs[3] = pagetable_get_paddr(kernel ? v->arch.guest_table : v->arch.guest_table_user); crs[4] = v->arch.pv_vcpu.ctrlreg[4]; + crs[5] = v->arch.pv_vcpu.fs_base; + crs[6 + !kernel] = v->arch.pv_vcpu.gs_base_kernel; + crs[7 - !kernel] = v->arch.pv_vcpu.gs_base_user; _show_registers(regs, crs, CTXT_pv_guest, v); } @@ -238,14 +252,7 @@ printk("*** DOUBLE FAULT ***\n"); print_xen_info(); - crs[0] = read_cr0(); - crs[2] = read_cr2(); - crs[3] = read_cr3(); - crs[4] = read_cr4(); - regs->ds = read_sreg(ds); - regs->es = read_sreg(es); - regs->fs = read_sreg(fs); - regs->gs = read_sreg(gs); + read_registers(regs, crs); printk("CPU: %d\n", cpu); _show_registers(regs, crs, CTXT_hypervisor, NULL); @@ -265,8 +272,17 @@ else v->arch.pv_vcpu.gs_base_user = __rdgsbase(); } - v->arch.flags ^= TF_kernel_mode; asm volatile ( "swapgs" ); + + toggle_guest_pt(v); +} + +void toggle_guest_pt(struct vcpu *v) +{ + if ( is_pv_32bit_vcpu(v) ) + return; + + v->arch.flags ^= TF_kernel_mode; update_cr3(v); /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */ asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" ); diff -Nru xen-4.9.0/xen/arch/x86/x86_emulate/x86_emulate.c xen-4.9.2/xen/arch/x86/x86_emulate/x86_emulate.c --- xen-4.9.0/xen/arch/x86/x86_emulate/x86_emulate.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/x86_emulate/x86_emulate.c 2018-03-28 13:10:55.000000000 +0000 @@ -828,7 +828,7 @@ #ifdef __XEN__ # define invoke_stub(pre, post, constraints...) do { \ union stub_exception_token res_ = { .raw = ~0 }; \ - asm volatile ( pre "\n\tcall *%[stub]\n\t" post "\n" \ + asm volatile ( pre "\n\tINDIRECT_CALL %[stub]\n\t" post "\n" \ ".Lret%=:\n\t" \ ".pushsection .fixup,\"ax\"\n" \ ".Lfix%=:\n\t" \ @@ -837,7 +837,7 @@ ".popsection\n\t" \ _ASM_EXTABLE(.Lret%=, .Lfix%=) \ : [exn] "+g" (res_), constraints, \ - [stub] "rm" (stub.func), \ + [stub] "r" (stub.func), \ "m" (*(uint8_t(*)[MAX_INST_LEN + 1])stub.ptr) ); \ if ( unlikely(~res_.raw) ) \ { \ @@ -1249,10 +1249,10 @@ /* Clip maximum repetitions so that the index register at most just wraps. */ #define truncate_ea_and_reps(ea, reps, bytes_per_rep) ({ \ - unsigned long todo__, ea__ = truncate_word(ea, ad_bytes); \ + unsigned long todo__, ea__ = truncate_ea(ea); \ if ( !(_regs.eflags & X86_EFLAGS_DF) ) \ - todo__ = truncate_word(-(ea), ad_bytes) / (bytes_per_rep); \ - else if ( truncate_word((ea) + (bytes_per_rep) - 1, ad_bytes) < ea__ )\ + todo__ = truncate_ea(-ea__) / (bytes_per_rep); \ + else if ( truncate_ea(ea__ + (bytes_per_rep) - 1) < ea__ ) \ todo__ = 1; \ else \ todo__ = ea__ / (bytes_per_rep) + 1; \ @@ -1937,10 +1937,10 @@ case 9: p = ®s->r9; break; case 10: p = ®s->r10; break; case 11: p = ®s->r11; break; - case 12: mark_regs_dirty(regs); p = ®s->r12; break; - case 13: mark_regs_dirty(regs); p = ®s->r13; break; - case 14: mark_regs_dirty(regs); p = ®s->r14; break; - case 15: mark_regs_dirty(regs); p = ®s->r15; break; + case 12: p = ®s->r12; break; + case 13: p = ®s->r13; break; + case 14: p = ®s->r14; break; + case 15: p = ®s->r15; break; #endif default: BUG(); p = NULL; break; } @@ -2444,6 +2444,10 @@ } done_prefixes: + /* %{e,c,s,d}s overrides are ignored in 64bit mode. */ + if ( mode_64bit() && override_seg < x86_seg_fs ) + override_seg = x86_seg_none; + if ( rex_prefix & REX_W ) op_bytes = 8; @@ -3128,6 +3132,7 @@ op_bytes + (((-src.val - 1) >> 3) & ~(op_bytes - 1L)); else ea.mem.off += (src.val >> 3) & ~(op_bytes - 1L); + ea.mem.off = truncate_ea(ea.mem.off); } /* Bit index always truncated to within range. */ @@ -3346,7 +3351,7 @@ unsigned long src_val2; int lb, ub, idx; generate_exception_if(src.type != OP_MEM, EXC_UD); - if ( (rc = read_ulong(src.mem.seg, src.mem.off + op_bytes, + if ( (rc = read_ulong(src.mem.seg, truncate_ea(src.mem.off + op_bytes), &src_val2, op_bytes, ctxt, ops)) ) goto done; ub = (op_bytes == 2) ? (int16_t)src_val2 : (int32_t)src_val2; @@ -3896,7 +3901,7 @@ seg = (b & 1) * 3; /* es = 0, ds = 3 */ les: generate_exception_if(src.type != OP_MEM, EXC_UD); - if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes, + if ( (rc = read_ulong(src.mem.seg, truncate_ea(src.mem.off + src.bytes), &dst.val, 2, ctxt, ops)) != X86EMUL_OKAY ) goto done; ASSERT(is_x86_user_segment(seg)); @@ -4930,7 +4935,8 @@ case 5: /* jmp (far, absolute indirect) */ generate_exception_if(src.type != OP_MEM, EXC_UD); - if ( (rc = read_ulong(src.mem.seg, src.mem.off + op_bytes, + if ( (rc = read_ulong(src.mem.seg, + truncate_ea(src.mem.off + op_bytes), &imm2, 2, ctxt, ops)) ) goto done; imm1 = src.val; @@ -4986,9 +4992,12 @@ } break; - case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */ { + case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */ + { unsigned long base, limit, cr0, cr0w; + seg = (modrm_reg & 1) ? x86_seg_idtr : x86_seg_gdtr; + switch( modrm ) { case 0xca: /* clac */ @@ -4999,7 +5008,7 @@ _regs.eflags &= ~X86_EFLAGS_AC; if ( modrm == 0xcb ) _regs.eflags |= X86_EFLAGS_AC; - goto complete_insn; + break; #ifdef __XEN__ case 0xd1: /* xsetbv */ @@ -5011,7 +5020,7 @@ handle_xsetbv(_regs.ecx, _regs.eax | (_regs.rdx << 32)), EXC_GP, 0); - goto complete_insn; + break; #endif case 0xd4: /* vmfunc */ @@ -5019,7 +5028,7 @@ fail_if(!ops->vmfunc); if ( (rc = ops->vmfunc(ctxt)) != X86EMUL_OKAY ) goto done; - goto complete_insn; + break; case 0xd5: /* xend */ generate_exception_if(vex.pfx, EXC_UD); @@ -5033,16 +5042,23 @@ EXC_UD); /* Neither HLE nor RTM can be active when we get here. */ _regs.eflags |= X86_EFLAGS_ZF; - goto complete_insn; + break; case 0xdf: /* invlpga */ - generate_exception_if(!in_protmode(ctxt, ops), EXC_UD); + fail_if(!ops->read_msr); + if ( (rc = ops->read_msr(MSR_EFER, + &msr_val, ctxt)) != X86EMUL_OKAY ) + goto done; + /* Finding SVME set implies vcpu_has_svm(). */ + generate_exception_if(!(msr_val & EFER_SVME) || + !in_protmode(ctxt, ops), EXC_UD); generate_exception_if(!mode_ring0(), EXC_GP, 0); + generate_exception_if(_regs.ecx, EXC_UD); /* TODO: Support ASIDs. */ fail_if(ops->invlpg == NULL); if ( (rc = ops->invlpg(x86_seg_none, truncate_ea(_regs.r(ax)), ctxt)) ) goto done; - goto complete_insn; + break; case 0xf9: /* rdtscp */ fail_if(ops->read_msr == NULL); @@ -5090,17 +5106,17 @@ base += sizeof(zero); limit -= sizeof(zero); } - goto complete_insn; - } + break; } - seg = (modrm_reg & 1) ? x86_seg_idtr : x86_seg_gdtr; +#define _GRP7(mod, reg) \ + (((mod) << 6) | ((reg) << 3)) ... (((mod) << 6) | ((reg) << 3) | 7) +#define GRP7_MEM(reg) _GRP7(0, reg): case _GRP7(1, reg): case _GRP7(2, reg) +#define GRP7_ALL(reg) GRP7_MEM(reg): case _GRP7(3, reg) - switch ( modrm_reg & 7 ) - { - case 0: /* sgdt */ - case 1: /* sidt */ - generate_exception_if(ea.type != OP_MEM, EXC_UD); + case GRP7_MEM(0): /* sgdt */ + case GRP7_MEM(1): /* sidt */ + ASSERT(ea.type == OP_MEM); generate_exception_if(umip_active(ctxt, ops), EXC_GP, 0); fail_if(!ops->read_segment || !ops->write); if ( (rc = ops->read_segment(seg, &sreg, ctxt)) ) @@ -5114,19 +5130,20 @@ } if ( (rc = ops->write(ea.mem.seg, ea.mem.off, &sreg.limit, 2, ctxt)) != X86EMUL_OKAY || - (rc = ops->write(ea.mem.seg, ea.mem.off + 2, &sreg.base, - op_bytes, ctxt)) != X86EMUL_OKAY ) + (rc = ops->write(ea.mem.seg, truncate_ea(ea.mem.off + 2), + &sreg.base, op_bytes, ctxt)) != X86EMUL_OKAY ) goto done; break; - case 2: /* lgdt */ - case 3: /* lidt */ + + case GRP7_MEM(2): /* lgdt */ + case GRP7_MEM(3): /* lidt */ + ASSERT(ea.type == OP_MEM); generate_exception_if(!mode_ring0(), EXC_GP, 0); - generate_exception_if(ea.type != OP_MEM, EXC_UD); fail_if(ops->write_segment == NULL); memset(&sreg, 0, sizeof(sreg)); - if ( (rc = read_ulong(ea.mem.seg, ea.mem.off+0, + if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &limit, 2, ctxt, ops)) || - (rc = read_ulong(ea.mem.seg, ea.mem.off+2, + (rc = read_ulong(ea.mem.seg, truncate_ea(ea.mem.off + 2), &base, mode_64bit() ? 8 : 4, ctxt, ops)) ) goto done; generate_exception_if(!is_canonical_address(base), EXC_GP, 0); @@ -5137,7 +5154,8 @@ if ( (rc = ops->write_segment(seg, &sreg, ctxt)) ) goto done; break; - case 4: /* smsw */ + + case GRP7_ALL(4): /* smsw */ generate_exception_if(umip_active(ctxt, ops), EXC_GP, 0); if ( ea.type == OP_MEM ) { @@ -5152,7 +5170,8 @@ if ( (rc = ops->read_cr(0, &dst.val, ctxt)) ) goto done; break; - case 6: /* lmsw */ + + case GRP7_ALL(6): /* lmsw */ fail_if(ops->read_cr == NULL); fail_if(ops->write_cr == NULL); generate_exception_if(!mode_ring0(), EXC_GP, 0); @@ -5168,13 +5187,19 @@ if ( (rc = ops->write_cr(0, cr0, ctxt)) ) goto done; break; - case 7: /* invlpg */ + + case GRP7_MEM(7): /* invlpg */ + ASSERT(ea.type == OP_MEM); generate_exception_if(!mode_ring0(), EXC_GP, 0); - generate_exception_if(ea.type != OP_MEM, EXC_UD); fail_if(ops->invlpg == NULL); if ( (rc = ops->invlpg(ea.mem.seg, ea.mem.off, ctxt)) ) goto done; break; + +#undef GRP7_ALL +#undef GRP7_MEM +#undef _GRP7 + default: goto cannot_emulate; } @@ -5597,9 +5622,8 @@ } else { - if ( ctxt->vendor == X86_VENDOR_AMD ) - vex.l = 0; - generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD); + generate_exception_if(vex.reg != 0xf, EXC_UD); + vex.l = 0; host_and_vcpu_must_have(avx); get_fpu(X86EMUL_FPU_ymm, &fic); } @@ -6732,10 +6756,9 @@ ea.type = OP_MEM; goto simd_0f_int_imm8; + CASE_SIMD_PACKED_INT(0x0f, 0xc5): /* pextrw $imm8,{,x}mm,reg */ case X86EMUL_OPC_VEX_66(0x0f, 0xc5): /* vpextrw $imm8,xmm,reg */ generate_exception_if(vex.l, EXC_UD); - /* fall through */ - CASE_SIMD_PACKED_INT(0x0f, 0xc5): /* pextrw $imm8,{,x}mm,reg */ opc = init_prefixes(stub); opc[0] = b; /* Convert GPR destination to %rAX. */ @@ -7514,6 +7537,8 @@ case X86EMUL_OPC_VEX_66(0x0f3a, 0x20): /* vpinsrb $imm8,r32/m8,xmm,xmm */ case X86EMUL_OPC_VEX_66(0x0f3a, 0x22): /* vpinsr{d,q} $imm8,r/m,xmm,xmm */ generate_exception_if(vex.l, EXC_UD); + if ( !mode_64bit() ) + vex.w = 0; memcpy(mmvalp, &src.val, op_bytes); ea.type = OP_MEM; op_bytes = src.bytes; diff -Nru xen-4.9.0/xen/arch/x86/x86_emulate.c xen-4.9.2/xen/arch/x86/x86_emulate.c --- xen-4.9.0/xen/arch/x86/x86_emulate.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/x86_emulate.c 2018-03-28 13:10:55.000000000 +0000 @@ -11,7 +11,6 @@ #include #include -#include /* mark_regs_dirty() */ #include /* current_cpu_info */ #include #include /* cpu_has_amd_erratum() */ diff -Nru xen-4.9.0/xen/arch/x86/xen.lds.S xen-4.9.2/xen/arch/x86/xen.lds.S --- xen-4.9.0/xen/arch/x86/xen.lds.S 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/arch/x86/xen.lds.S 2018-03-28 13:10:55.000000000 +0000 @@ -59,6 +59,14 @@ .text : { _stext = .; /* Text and read-only data */ *(.text) + *(.text.__x86_indirect_thunk_*) + + . = ALIGN(PAGE_SIZE); + _stextentry = .; + *(.text.entry) + . = ALIGN(PAGE_SIZE); + _etextentry = .; + *(.text.cold) *(.text.unlikely) *(.fixup) diff -Nru xen-4.9.0/xen/common/compat/grant_table.c xen-4.9.2/xen/common/compat/grant_table.c --- xen-4.9.0/xen/common/compat/grant_table.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/common/compat/grant_table.c 2018-03-28 13:10:55.000000000 +0000 @@ -258,9 +258,9 @@ rc = gnttab_copy(guest_handle_cast(nat.uop, gnttab_copy_t), n); if ( rc > 0 ) { - ASSERT(rc < n); - i -= n - rc; - n = rc; + ASSERT(rc <= n); + i -= rc; + n -= rc; } if ( rc >= 0 ) { diff -Nru xen-4.9.0/xen/common/domain.c xen-4.9.2/xen/common/domain.c --- xen-4.9.0/xen/common/domain.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/common/domain.c 2018-03-28 13:10:55.000000000 +0000 @@ -795,6 +795,14 @@ struct vcpu *v; int i; + /* + * Flush all state for the vCPU previously having run on the current CPU. + * This is in particular relevant for x86 HVM ones on VMX, so that this + * flushing of state won't happen from the TLB flush IPI handler behind + * the back of a vmx_vmcs_enter() / vmx_vmcs_exit() section. + */ + sync_local_execstate(); + for ( i = d->max_vcpus - 1; i >= 0; i-- ) { if ( (v = d->vcpu[i]) == NULL ) diff -Nru xen-4.9.0/xen/common/domctl.c xen-4.9.2/xen/common/domctl.c --- xen-4.9.0/xen/common/domctl.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/common/domctl.c 2018-03-28 13:10:55.000000000 +0000 @@ -864,7 +864,7 @@ < sizeof(struct compat_vcpu_guest_context)); #endif ret = -ENOMEM; - if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL ) + if ( (c.nat = xzalloc(struct vcpu_guest_context)) == NULL ) goto getvcpucontext_out; vcpu_pause(v); diff -Nru xen-4.9.0/xen/common/grant_table.c xen-4.9.2/xen/common/grant_table.c --- xen-4.9.0/xen/common/grant_table.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/common/grant_table.c 2018-03-28 13:10:55.000000000 +0000 @@ -304,11 +304,16 @@ { unsigned int head, next, prev_head; + spin_lock(&v->maptrack_freelist_lock); + do { /* No maptrack pages allocated for this VCPU yet? */ head = read_atomic(&v->maptrack_head); if ( unlikely(head == MAPTRACK_TAIL) ) + { + spin_unlock(&v->maptrack_freelist_lock); return -1; + } /* * Always keep one entry in the free list to make it easier to @@ -316,12 +321,17 @@ */ next = read_atomic(&maptrack_entry(t, head).ref); if ( unlikely(next == MAPTRACK_TAIL) ) + { + spin_unlock(&v->maptrack_freelist_lock); return -1; + } prev_head = head; head = cmpxchg(&v->maptrack_head, prev_head, next); } while ( head != prev_head ); + spin_unlock(&v->maptrack_freelist_lock); + return head; } @@ -380,6 +390,8 @@ /* 2. Add entry to the tail of the list on the original VCPU. */ v = currd->vcpu[maptrack_entry(t, handle).vcpu]; + spin_lock(&v->maptrack_freelist_lock); + cur_tail = read_atomic(&v->maptrack_tail); do { prev_tail = cur_tail; @@ -388,6 +400,8 @@ /* 3. Update the old tail entry to point to the new entry. */ write_atomic(&maptrack_entry(t, prev_tail).ref, handle); + + spin_unlock(&v->maptrack_freelist_lock); } static inline int @@ -397,7 +411,7 @@ struct vcpu *curr = current; unsigned int i, head; grant_handle_t handle; - struct grant_mapping *new_mt; + struct grant_mapping *new_mt = NULL; handle = __get_maptrack_handle(lgt, curr); if ( likely(handle != -1) ) @@ -406,15 +420,16 @@ spin_lock(&lgt->maptrack_lock); /* - * If we've run out of frames, try stealing an entry from another - * VCPU (in case the guest isn't mapping across its VCPUs evenly). + * If we've run out of handles and still have frame headroom, try + * allocating a new maptrack frame. If there is no headroom, or we're + * out of memory, try stealing an entry from another VCPU (in case the + * guest isn't mapping across its VCPUs evenly). */ - if ( nr_maptrack_frames(lgt) >= max_maptrack_frames ) + if ( nr_maptrack_frames(lgt) < max_maptrack_frames ) + new_mt = alloc_xenheap_page(); + + if ( !new_mt ) { - /* - * Can drop the lock since no other VCPU can be adding a new - * frame once they've run out. - */ spin_unlock(&lgt->maptrack_lock); /* @@ -426,18 +441,16 @@ handle = steal_maptrack_handle(lgt, curr); if ( handle == -1 ) return -1; + spin_lock(&curr->maptrack_freelist_lock); + maptrack_entry(lgt, handle).ref = MAPTRACK_TAIL; curr->maptrack_tail = handle; - write_atomic(&curr->maptrack_head, handle); + if ( curr->maptrack_head == MAPTRACK_TAIL ) + write_atomic(&curr->maptrack_head, handle); + spin_unlock(&curr->maptrack_freelist_lock); } return steal_maptrack_handle(lgt, curr); } - new_mt = alloc_xenheap_page(); - if ( !new_mt ) - { - spin_unlock(&lgt->maptrack_lock); - return -1; - } clear_page(new_mt); /* @@ -460,12 +473,15 @@ smp_wmb(); lgt->maptrack_limit += MAPTRACK_PER_PAGE; + spin_unlock(&lgt->maptrack_lock); + spin_lock(&curr->maptrack_freelist_lock); + do { new_mt[i - 1].ref = read_atomic(&curr->maptrack_head); head = cmpxchg(&curr->maptrack_head, new_mt[i - 1].ref, handle + 1); } while ( head != new_mt[i - 1].ref ); - spin_unlock(&lgt->maptrack_lock); + spin_unlock(&curr->maptrack_freelist_lock); return handle; } @@ -664,10 +680,10 @@ return _set_status_v2(domid, readonly, mapflag, shah, act, status); } -static int grant_map_exists(const struct domain *ld, - struct grant_table *rgt, - unsigned long mfn, - unsigned int *ref_count) +static struct active_grant_entry *grant_map_exists(const struct domain *ld, + struct grant_table *rgt, + unsigned long mfn, + unsigned int *ref_count) { unsigned int ref, max_iter; @@ -683,28 +699,20 @@ nr_grant_entries(rgt)); for ( ref = *ref_count; ref < max_iter; ref++ ) { - struct active_grant_entry *act; - bool_t exists; - - act = active_entry_acquire(rgt, ref); - - exists = act->pin - && act->domid == ld->domain_id - && act->frame == mfn; + struct active_grant_entry *act = active_entry_acquire(rgt, ref); + if ( act->pin && act->domid == ld->domain_id && act->frame == mfn ) + return act; active_entry_release(act); - - if ( exists ) - return 0; } if ( ref < nr_grant_entries(rgt) ) { *ref_count = ref; - return 1; + return NULL; } - return -EINVAL; + return ERR_PTR(-EINVAL); } #define MAPKIND_READ 1 @@ -1508,23 +1516,74 @@ return -ENOMEM; } -static void +static int gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt) { - int i; + unsigned int i; for ( i = 0; i < nr_status_frames(gt); i++ ) { struct page_info *pg = virt_to_page(gt->status[i]); + gfn_t gfn = gnttab_get_frame_gfn(d, true, i); + + /* + * For translated domains, recovering from failure after partial + * changes were made is more complicated than it seems worth + * implementing at this time. Hence respective error paths below + * crash the domain in such a case. + */ + if ( paging_mode_translate(d) ) + { + int rc = gfn_eq(gfn, INVALID_GFN) + ? 0 + : guest_physmap_remove_page(d, gfn, + _mfn(page_to_mfn(pg)), 0); + + if ( rc ) + { + gprintk(XENLOG_ERR, + "Could not remove status frame %u (GFN %#lx) from P2M\n", + i, gfn_x(gfn)); + domain_crash(d); + return rc; + } + gnttab_set_frame_gfn(d, true, i, INVALID_GFN); + } BUG_ON(page_get_owner(pg) != d); if ( test_and_clear_bit(_PGC_allocated, &pg->count_info) ) put_page(pg); - BUG_ON(pg->count_info & ~PGC_xen_heap); + + if ( pg->count_info & ~PGC_xen_heap ) + { + if ( paging_mode_translate(d) ) + { + gprintk(XENLOG_ERR, + "Wrong page state %#lx of status frame %u (GFN %#lx)\n", + pg->count_info, i, gfn_x(gfn)); + domain_crash(d); + } + else + { + if ( get_page(pg, d) ) + set_bit(_PGC_allocated, &pg->count_info); + while ( i-- ) + gnttab_create_status_page(d, gt, i); + } + return -EBUSY; + } + + page_set_owner(pg, NULL); + } + + for ( i = 0; i < nr_status_frames(gt); i++ ) + { free_xenheap_page(gt->status[i]); gt->status[i] = NULL; } gt->nr_status_frames = 0; + + return 0; } /* @@ -1598,7 +1657,7 @@ XEN_GUEST_HANDLE_PARAM(gnttab_setup_table_t) uop, unsigned int count) { struct gnttab_setup_table op; - struct domain *d; + struct domain *d = NULL; struct grant_table *gt; int i; xen_pfn_t gmfn; @@ -1618,7 +1677,7 @@ " per domain.\n", max_grant_frames); op.status = GNTST_general_error; - goto out1; + goto out; } if ( !guest_handle_okay(op.frame_list, op.nr_frames) ) @@ -1629,13 +1688,13 @@ { gdprintk(XENLOG_INFO, "Bad domid %d.\n", op.dom); op.status = GNTST_bad_domain; - goto out2; + goto out; } if ( xsm_grant_setup(XSM_TARGET, current->domain, d) ) { op.status = GNTST_permission_denied; - goto out2; + goto out; } gt = d->grant_table; @@ -1653,7 +1712,7 @@ "Expand grant table to %u failed. Current: %u Max: %u\n", op.nr_frames, nr_grant_frames(gt), max_grant_frames); op.status = GNTST_general_error; - goto out3; + goto unlock; } op.status = GNTST_okay; @@ -1666,11 +1725,12 @@ op.status = GNTST_bad_virt_addr; } - out3: + unlock: grant_write_unlock(gt); - out2: - rcu_unlock_domain(d); - out1: + out: + if ( d ) + rcu_unlock_domain(d); + if ( unlikely(__copy_field_to_guest(uop, &op, status)) ) return -EFAULT; @@ -2033,13 +2093,8 @@ unsigned long r_frame; uint16_t *status; grant_ref_t trans_gref; - int released_read; - int released_write; struct domain *td; - released_read = 0; - released_write = 0; - grant_read_lock(rgt); act = active_entry_acquire(rgt, gref); @@ -2069,29 +2124,22 @@ act->pin -= GNTPIN_hstw_inc; if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) ) - { - released_write = 1; gnttab_clear_flag(_GTF_writing, status); - } } if ( !act->pin ) - { gnttab_clear_flag(_GTF_reading, status); - released_read = 1; - } active_entry_release(act); grant_read_unlock(rgt); if ( td != rd ) { - /* Recursive calls, but they're tail calls, so it's - okay. */ - if ( released_write ) - __release_grant_for_copy(td, trans_gref, 0); - else if ( released_read ) - __release_grant_for_copy(td, trans_gref, 1); + /* + * Recursive call, but it is bounded (acquire permits only a single + * level of transitivity), so it's okay. + */ + __release_grant_for_copy(td, trans_gref, readonly); rcu_unlock_domain(td); } @@ -2105,10 +2153,10 @@ static void __fixup_status_for_copy_pin(const struct active_grant_entry *act, uint16_t *status) { - if ( !(act->pin & GNTPIN_hstw_mask) ) + if ( !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) ) gnttab_clear_flag(_GTF_writing, status); - if ( !(act->pin & GNTPIN_hstr_mask) ) + if ( !act->pin ) gnttab_clear_flag(_GTF_reading, status); } @@ -2165,8 +2213,108 @@ act->domid, ldom, act->pin); old_pin = act->pin; - if ( !act->pin || - (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) ) + if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive ) + { + if ( (!old_pin || (!readonly && + !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)))) && + (rc = _set_status_v2(ldom, readonly, 0, shah, act, + status)) != GNTST_okay ) + goto unlock_out; + + if ( !allow_transitive ) + PIN_FAIL(unlock_out_clear, GNTST_general_error, + "transitive grant when transitivity not allowed\n"); + + trans_domid = sha2->transitive.trans_domid; + trans_gref = sha2->transitive.gref; + barrier(); /* Stop the compiler from re-loading + trans_domid from shared memory */ + if ( trans_domid == rd->domain_id ) + PIN_FAIL(unlock_out_clear, GNTST_general_error, + "transitive grants cannot be self-referential\n"); + + /* + * We allow the trans_domid == ldom case, which corresponds to a + * grant being issued by one domain, sent to another one, and then + * transitively granted back to the original domain. Allowing it + * is easy, and means that you don't need to go out of your way to + * avoid it in the guest. + */ + + /* We need to leave the rrd locked during the grant copy. */ + td = rcu_lock_domain_by_id(trans_domid); + if ( td == NULL ) + PIN_FAIL(unlock_out_clear, GNTST_general_error, + "transitive grant referenced bad domain %d\n", + trans_domid); + + /* + * __acquire_grant_for_copy() could take the lock on the + * remote table (if rd == td), so we have to drop the lock + * here and reacquire. + */ + active_entry_release(act); + grant_read_unlock(rgt); + + rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, + readonly, &grant_frame, page, + &trans_page_off, &trans_length, 0); + + grant_read_lock(rgt); + act = active_entry_acquire(rgt, gref); + + if ( rc != GNTST_okay ) + { + __fixup_status_for_copy_pin(act, status); + rcu_unlock_domain(td); + active_entry_release(act); + grant_read_unlock(rgt); + return rc; + } + + /* + * We dropped the lock, so we have to check that the grant didn't + * change, and that nobody else tried to pin/unpin it. If anything + * changed, just give up and tell the caller to retry. + */ + if ( rgt->gt_version != 2 || + act->pin != old_pin || + (old_pin && (act->domid != ldom || act->frame != grant_frame || + act->start != trans_page_off || + act->length != trans_length || + act->trans_domain != td || + act->trans_gref != trans_gref || + !act->is_sub_page)) ) + { + __release_grant_for_copy(td, trans_gref, readonly); + __fixup_status_for_copy_pin(act, status); + rcu_unlock_domain(td); + active_entry_release(act); + grant_read_unlock(rgt); + put_page(*page); + *page = NULL; + return ERESTART; + } + + if ( !old_pin ) + { + act->domid = ldom; + act->start = trans_page_off; + act->length = trans_length; + act->trans_domain = td; + act->trans_gref = trans_gref; + act->frame = grant_frame; + act->gfn = -1ul; + /* + * The actual remote remote grant may or may not be a sub-page, + * but we always treat it as one because that blocks mappings of + * transitive grants. + */ + act->is_sub_page = 1; + } + } + else if ( !old_pin || + (!readonly && !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) ) { if ( (rc = _set_status(rgt->gt_version, ldom, readonly, 0, shah, act, @@ -2187,79 +2335,6 @@ trans_page_off = 0; trans_length = PAGE_SIZE; } - else if ( (shah->flags & GTF_type_mask) == GTF_transitive ) - { - if ( !allow_transitive ) - PIN_FAIL(unlock_out_clear, GNTST_general_error, - "transitive grant when transitivity not allowed\n"); - - trans_domid = sha2->transitive.trans_domid; - trans_gref = sha2->transitive.gref; - barrier(); /* Stop the compiler from re-loading - trans_domid from shared memory */ - if ( trans_domid == rd->domain_id ) - PIN_FAIL(unlock_out_clear, GNTST_general_error, - "transitive grants cannot be self-referential\n"); - - /* We allow the trans_domid == ldom case, which - corresponds to a grant being issued by one domain, sent - to another one, and then transitively granted back to - the original domain. Allowing it is easy, and means - that you don't need to go out of your way to avoid it - in the guest. */ - - /* We need to leave the rrd locked during the grant copy */ - td = rcu_lock_domain_by_id(trans_domid); - if ( td == NULL ) - PIN_FAIL(unlock_out_clear, GNTST_general_error, - "transitive grant referenced bad domain %d\n", - trans_domid); - - /* - * __acquire_grant_for_copy() could take the lock on the - * remote table (if rd == td), so we have to drop the lock - * here and reacquire - */ - active_entry_release(act); - grant_read_unlock(rgt); - - rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, - readonly, &grant_frame, page, - &trans_page_off, &trans_length, 0); - - grant_read_lock(rgt); - act = active_entry_acquire(rgt, gref); - - if ( rc != GNTST_okay ) { - __fixup_status_for_copy_pin(act, status); - rcu_unlock_domain(td); - active_entry_release(act); - grant_read_unlock(rgt); - return rc; - } - - /* We dropped the lock, so we have to check that nobody - else tried to pin (or, for that matter, unpin) the - reference in *this* domain. If they did, just give up - and try again. */ - if ( act->pin != old_pin ) - { - __fixup_status_for_copy_pin(act, status); - rcu_unlock_domain(td); - active_entry_release(act); - grant_read_unlock(rgt); - put_page(*page); - return __acquire_grant_for_copy(rd, gref, ldom, readonly, - frame, page, page_off, length, - allow_transitive); - } - - /* The actual remote remote grant may or may not be a - sub-page, but we always treat it as one because that - blocks mappings of transitive grants. */ - is_sub_page = 1; - act->gfn = -1ul; - } else if ( !(sha2->hdr.flags & GTF_sub_page) ) { rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, readonly, rd); @@ -2299,9 +2374,20 @@ td = page_get_owner_and_reference(*page); /* * act->pin being non-zero should guarantee the page to have a - * non-zero refcount and hence a valid owner. + * non-zero refcount and hence a valid owner (matching the one on + * record), with one exception: If the owning domain is dying we + * had better not make implications from pin count (map_grant_ref() + * updates pin counts before obtaining page references, for + * example). */ - ASSERT(td); + if ( td != rd || rd->is_dying ) + { + if ( td ) + put_page(*page); + *page = NULL; + rc = GNTST_bad_domain; + goto unlock_out_clear; + } } act->pin += readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; @@ -2316,7 +2402,7 @@ unlock_out_clear: if ( !(readonly) && - !(act->pin & GNTPIN_hstw_mask) ) + !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) ) gnttab_clear_flag(_GTF_writing, status); if ( !act->pin ) @@ -2420,6 +2506,11 @@ unmap_domain_page(buf->virt); buf->virt = NULL; } + if ( buf->have_grant ) + { + __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only); + buf->have_grant = 0; + } if ( buf->have_type ) { put_page_type(buf->page); @@ -2430,11 +2521,6 @@ put_page(buf->page); buf->page = NULL; } - if ( buf->have_grant ) - { - __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only); - buf->have_grant = 0; - } } static int gnttab_copy_claim_buf(const struct gnttab_copy *op, @@ -2557,7 +2643,7 @@ { gnttab_copy_release_buf(src); rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref); - if ( rc < 0 ) + if ( rc ) goto out; } @@ -2567,7 +2653,7 @@ { gnttab_copy_release_buf(dest); rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref); - if ( rc < 0 ) + if ( rc ) goto out; } @@ -2576,6 +2662,14 @@ return rc; } +/* + * gnttab_copy(), other than the various other helpers of + * do_grant_table_op(), returns (besides possible error indicators) + * "count - i" rather than "i" to ensure that even if no progress + * was made at all (perhaps due to gnttab_copy_one() returning a + * positive value) a non-zero value is being handed back (zero needs + * to be avoided, as that means "success, all done"). + */ static long gnttab_copy( XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count) { @@ -2589,7 +2683,7 @@ { if ( i && hypercall_preempt_check() ) { - rc = i; + rc = count - i; break; } @@ -2599,13 +2693,20 @@ break; } - op.status = gnttab_copy_one(&op, &dest, &src); - if ( op.status != GNTST_okay ) + rc = gnttab_copy_one(&op, &dest, &src); + if ( rc > 0 ) + { + rc = count - i; + break; + } + if ( rc != GNTST_okay ) { gnttab_copy_release_buf(&src); gnttab_copy_release_buf(&dest); } + op.status = rc; + rc = 0; if ( unlikely(__copy_field_to_guest(uop, &op, status)) ) { rc = -EFAULT; @@ -2676,10 +2777,13 @@ case 2: for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ ) { - if ( ((shared_entry_v2(gt, i).hdr.flags & GTF_type_mask) == - GTF_permit_access) && - (shared_entry_v2(gt, i).full_page.frame >> 32) ) + switch ( shared_entry_v2(gt, i).hdr.flags & GTF_type_mask ) { + case GTF_permit_access: + if ( !(shared_entry_v2(gt, i).full_page.frame >> 32) ) + break; + /* fall through */ + case GTF_transitive: gdprintk(XENLOG_WARNING, "tried to change grant table version to 1 with non-representable entries\n"); res = -ERANGE; @@ -2721,8 +2825,9 @@ break; } - if ( op.version < 2 && gt->gt_version == 2 ) - gnttab_unpopulate_status_frames(currd, gt); + if ( op.version < 2 && gt->gt_version == 2 && + (res = gnttab_unpopulate_status_frames(currd, gt)) != 0 ) + goto out_unlock; /* Make sure there's no crud left over from the old version. */ for ( i = 0; i < nr_grant_frames(gt); i++ ) @@ -2946,30 +3051,29 @@ return 0; } -static int __gnttab_cache_flush(gnttab_cache_flush_t *cflush, +static int __gnttab_cache_flush(const gnttab_cache_flush_t *cflush, unsigned int *ref_count) { struct domain *d, *owner; struct page_info *page; unsigned long mfn; + struct active_grant_entry *act = NULL; void *v; int ret; if ( (cflush->offset >= PAGE_SIZE) || (cflush->length > PAGE_SIZE) || - (cflush->offset + cflush->length > PAGE_SIZE) ) + (cflush->offset + cflush->length > PAGE_SIZE) || + (cflush->op & ~(GNTTAB_CACHE_INVAL | GNTTAB_CACHE_CLEAN)) ) return -EINVAL; if ( cflush->length == 0 || cflush->op == 0 ) - return 0; + return !*ref_count ? 0 : -EILSEQ; /* currently unimplemented */ if ( cflush->op & GNTTAB_CACHE_SOURCE_GREF ) return -EOPNOTSUPP; - if ( cflush->op & ~(GNTTAB_CACHE_INVAL|GNTTAB_CACHE_CLEAN) ) - return -EINVAL; - d = rcu_lock_current_domain(); mfn = cflush->a.dev_bus_addr >> PAGE_SHIFT; @@ -2981,7 +3085,7 @@ page = mfn_to_page(mfn); owner = page_get_owner_and_reference(page); - if ( !owner ) + if ( !owner || !owner->grant_table ) { rcu_unlock_domain(d); return -EPERM; @@ -2991,13 +3095,13 @@ { grant_read_lock(owner->grant_table); - ret = grant_map_exists(d, owner->grant_table, mfn, ref_count); - if ( ret != 0 ) + act = grant_map_exists(d, owner->grant_table, mfn, ref_count); + if ( IS_ERR_OR_NULL(act) ) { grant_read_unlock(owner->grant_table); rcu_unlock_domain(d); put_page(page); - return ret; + return act ? PTR_ERR(act) : 1; } } @@ -3014,9 +3118,14 @@ ret = 0; if ( d != owner ) + { + active_entry_release(act); grant_read_unlock(owner->grant_table); + } + unmap_domain_page(v); put_page(page); + rcu_unlock_domain(d); return ret; } @@ -3049,6 +3158,9 @@ *ref_count = 0; guest_handle_add_offset(uop, 1); } + + *ref_count = 0; + return 0; } @@ -3143,6 +3255,7 @@ rc = gnttab_copy(copy, count); if ( rc > 0 ) { + rc = count - rc; guest_handle_add_offset(copy, rc); uop = guest_handle_cast(copy, void); } @@ -3473,6 +3586,7 @@ void grant_table_init_vcpu(struct vcpu *v) { + spin_lock_init(&v->maptrack_freelist_lock); v->maptrack_head = MAPTRACK_TAIL; v->maptrack_tail = MAPTRACK_TAIL; } diff -Nru xen-4.9.0/xen/common/kernel.c xen-4.9.2/xen/common/kernel.c --- xen-4.9.0/xen/common/kernel.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/common/kernel.c 2018-03-28 13:10:55.000000000 +0000 @@ -184,6 +184,42 @@ return -1; } +int parse_boolean(const char *name, const char *s, const char *e) +{ + size_t slen, nlen; + int val = !!strncmp(s, "no-", 3); + + if ( !val ) + s += 3; + + slen = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s); + nlen = strlen(name); + + /* Does s now start with name? */ + if ( slen < nlen || strncmp(s, name, nlen) ) + return -1; + + /* Exact, unadorned name? Result depends on the 'no-' prefix. */ + if ( slen == nlen ) + return val; + + /* =$SOMETHING? Defer to the regular boolean parsing. */ + if ( s[nlen] == '=' ) + { + char buf[8]; + + s += nlen + 1; + if ( e <= s || e - s >= ARRAY_SIZE(buf) ) + return -1; + memcpy(buf, s, e - s); + buf[e - s] = 0; + return parse_bool(buf); + } + + /* Unrecognised. Give up. */ + return -1; +} + unsigned int tainted; /** diff -Nru xen-4.9.0/xen/common/livepatch.c xen-4.9.2/xen/common/livepatch.c --- xen-4.9.0/xen/common/livepatch.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/common/livepatch.c 2018-03-28 13:10:55.000000000 +0000 @@ -771,8 +771,8 @@ } } - symtab = xmalloc_array(struct livepatch_symbol, nsyms); - strtab = xmalloc_array(char, strtab_len); + symtab = xzalloc_array(struct livepatch_symbol, nsyms); + strtab = xzalloc_array(char, strtab_len); if ( !strtab || !symtab ) { diff -Nru xen-4.9.0/xen/common/livepatch_elf.c xen-4.9.2/xen/common/livepatch_elf.c --- xen-4.9.0/xen/common/livepatch_elf.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/common/livepatch_elf.c 2018-03-28 13:10:55.000000000 +0000 @@ -52,7 +52,7 @@ int rc; /* livepatch_elf_load sanity checked e_shnum. */ - sec = xmalloc_array(struct livepatch_elf_sec, elf->hdr->e_shnum); + sec = xzalloc_array(struct livepatch_elf_sec, elf->hdr->e_shnum); if ( !sec ) { dprintk(XENLOG_ERR, LIVEPATCH"%s: Could not allocate memory for section table!\n", @@ -225,7 +225,7 @@ /* No need to check values as elf_resolve_sections did it. */ nsym = symtab_sec->sec->sh_size / symtab_sec->sec->sh_entsize; - sym = xmalloc_array(struct livepatch_elf_sym, nsym); + sym = xzalloc_array(struct livepatch_elf_sym, nsym); if ( !sym ) { dprintk(XENLOG_ERR, LIVEPATCH "%s: Could not allocate memory for symbols\n", diff -Nru xen-4.9.0/xen/common/memory.c xen-4.9.2/xen/common/memory.c --- xen-4.9.0/xen/common/memory.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/common/memory.c 2018-03-28 13:10:55.000000000 +0000 @@ -341,9 +341,6 @@ rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0); - if ( !rc && test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) - put_page_and_type(page); - /* * With the lack of an IOMMU on some platforms, domains with DMA-capable * device must retrieve the same pfn when the hypercall populate_physmap @@ -411,6 +408,31 @@ a->nr_done = i; } +static bool propagate_node(unsigned int xmf, unsigned int *memflags) +{ + const struct domain *currd = current->domain; + + BUILD_BUG_ON(XENMEMF_get_node(0) != NUMA_NO_NODE); + BUILD_BUG_ON(MEMF_get_node(0) != NUMA_NO_NODE); + + if ( XENMEMF_get_node(xmf) == NUMA_NO_NODE ) + return true; + + if ( is_hardware_domain(currd) || is_control_domain(currd) ) + { + if ( XENMEMF_get_node(xmf) >= MAX_NUMNODES ) + return false; + + *memflags |= MEMF_node(XENMEMF_get_node(xmf)); + if ( xmf & XENMEMF_exact_node_request ) + *memflags |= MEMF_exact_node; + } + else if ( xmf & XENMEMF_exact_node_request ) + return false; + + return true; +} + static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg) { struct xen_memory_exchange exch; @@ -483,6 +505,12 @@ } } + if ( unlikely(!propagate_node(exch.out.mem_flags, &memflags)) ) + { + rc = -EINVAL; + goto fail_early; + } + d = rcu_lock_domain_by_any_id(exch.in.domid); if ( d == NULL ) { @@ -501,7 +529,6 @@ d, XENMEMF_get_address_bits(exch.out.mem_flags) ? : (BITS_PER_LONG+PAGE_SHIFT))); - memflags |= MEMF_node(XENMEMF_get_node(exch.out.mem_flags)); for ( i = (exch.nr_exchanged >> in_chunk_order); i < (exch.in.nr_extents >> in_chunk_order); @@ -774,6 +801,11 @@ guest_handle_add_offset(xatpb->errs, start); xatpb->size -= start; + if ( !guest_handle_okay(xatpb->idxs, xatpb->size) || + !guest_handle_okay(xatpb->gpfns, xatpb->size) || + !guest_handle_okay(xatpb->errs, xatpb->size) ) + return -EFAULT; + while ( xatpb->size > done ) { xen_ulong_t idx; @@ -864,12 +896,8 @@ } read_unlock(&d->vnuma_rwlock); } - else - { - a->memflags |= MEMF_node(XENMEMF_get_node(r->mem_flags)); - if ( r->mem_flags & XENMEMF_exact_node_request ) - a->memflags |= MEMF_exact_node; - } + else if ( unlikely(!propagate_node(r->mem_flags, &a->memflags)) ) + return -EINVAL; return 0; } @@ -1096,10 +1124,7 @@ if ( start_extent != (typeof(xatpb.size))start_extent ) return -EDOM; - if ( copy_from_guest(&xatpb, arg, 1) || - !guest_handle_okay(xatpb.idxs, xatpb.size) || - !guest_handle_okay(xatpb.gpfns, xatpb.size) || - !guest_handle_okay(xatpb.errs, xatpb.size) ) + if ( copy_from_guest(&xatpb, arg, 1) ) return -EFAULT; /* This mapspace is unsupported for this hypercall. */ diff -Nru xen-4.9.0/xen/common/page_alloc.c xen-4.9.2/xen/common/page_alloc.c --- xen-4.9.0/xen/common/page_alloc.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/common/page_alloc.c 2018-03-28 13:10:55.000000000 +0000 @@ -186,7 +186,11 @@ * BOOT-TIME ALLOCATOR */ -static unsigned long __initdata first_valid_mfn = ~0UL; +/* + * first_valid_mfn is exported because it is use in ARM specific NUMA + * helpers. See comment in asm-arm/numa.h. + */ +unsigned long first_valid_mfn = ~0UL; static struct bootmem_region { unsigned long s, e; /* MFNs @s through @e-1 inclusive are free */ @@ -706,9 +710,13 @@ if ( node >= MAX_NUMNODES ) node = cpu_to_node(smp_processor_id()); } + else if ( unlikely(node >= MAX_NUMNODES) ) + { + ASSERT_UNREACHABLE(); + return NULL; + } first_node = node; - ASSERT(node < MAX_NUMNODES); ASSERT(zone_lo <= zone_hi); ASSERT(zone_hi < NR_ZONES); @@ -956,7 +964,7 @@ /* If a page has no owner it will need no safety TLB flush. */ pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL); if ( pg[i].u.free.need_tlbflush ) - pg[i].tlbflush_timestamp = tlbflush_current_time(); + page_set_tlbflush_timestamp(&pg[i]); /* This page is not a guest frame any more. */ page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */ @@ -1269,6 +1277,16 @@ { unsigned long i; + /* + * Some pages may not go through the boot allocator (e.g reserved + * memory at boot but released just after --- kernel, initramfs, + * etc.). + * Update first_valid_mfn to ensure those regions are covered. + */ + spin_lock(&heap_lock); + first_valid_mfn = min_t(unsigned long, page_to_mfn(pg), first_valid_mfn); + spin_unlock(&heap_lock); + for ( i = 0; i < nr_pages; i++ ) { unsigned int nid = phys_to_nid(page_to_maddr(pg+i)); diff -Nru xen-4.9.0/xen/common/stop_machine.c xen-4.9.2/xen/common/stop_machine.c --- xen-4.9.0/xen/common/stop_machine.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/common/stop_machine.c 2018-03-28 13:10:55.000000000 +0000 @@ -94,6 +94,7 @@ stopmachine_data.fn_data = data; stopmachine_data.nr_cpus = nr_cpus; stopmachine_data.fn_cpu = cpu; + stopmachine_data.fn_result = 0; atomic_set(&stopmachine_data.done, 0); stopmachine_data.state = STOPMACHINE_START; @@ -112,7 +113,11 @@ stopmachine_set_state(STOPMACHINE_INVOKE); if ( (cpu == smp_processor_id()) || (cpu == NR_CPUS) ) - stopmachine_data.fn_result = (*fn)(data); + { + ret = (*fn)(data); + if ( ret ) + write_atomic(&stopmachine_data.fn_result, ret); + } stopmachine_wait_state(); ret = stopmachine_data.fn_result; @@ -150,8 +155,12 @@ case STOPMACHINE_INVOKE: if ( (stopmachine_data.fn_cpu == smp_processor_id()) || (stopmachine_data.fn_cpu == NR_CPUS) ) - stopmachine_data.fn_result = - stopmachine_data.fn(stopmachine_data.fn_data); + { + int ret = stopmachine_data.fn(stopmachine_data.fn_data); + + if ( ret ) + write_atomic(&stopmachine_data.fn_result, ret); + } break; default: break; diff -Nru xen-4.9.0/xen/common/wait.c xen-4.9.2/xen/common/wait.c --- xen-4.9.0/xen/common/wait.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/common/wait.c 2018-03-28 13:10:55.000000000 +0000 @@ -127,7 +127,6 @@ unsigned long dummy; u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector; - cpu_info->guest_cpu_user_regs.entry_vector &= ~TRAP_regs_partial; ASSERT(wqv->esp == 0); /* Save current VCPU affinity; force wakeup on *this* CPU only. */ @@ -139,14 +138,26 @@ domain_crash_synchronous(); } + /* Hand-rolled setjmp(). */ asm volatile ( - "push %%rax; push %%rbx; push %%rdx; " - "push %%rbp; push %%r8; push %%r9; push %%r10; push %%r11; " - "push %%r12; push %%r13; push %%r14; push %%r15; call 1f; " - "1: addq $2f-1b,(%%rsp); sub %%esp,%%ecx; cmp %3,%%ecx; ja 3f; " - "mov %%rsp,%%rsi; 2: rep movsb; mov %%rsp,%%rsi; 3: pop %%rax; " - "pop %%r15; pop %%r14; pop %%r13; pop %%r12; " - "pop %%r11; pop %%r10; pop %%r9; pop %%r8; " + "push %%rax; push %%rbx; push %%rdx; push %%rbp;" + "push %%r8; push %%r9; push %%r10; push %%r11;" + "push %%r12; push %%r13; push %%r14; push %%r15;" + + "call 1f;" + "1: addq $2f-1b,(%%rsp);" + "sub %%esp,%%ecx;" + "cmp %3,%%ecx;" + "ja 3f;" + "mov %%rsp,%%rsi;" + + /* check_wakeup_from_wait() longjmp()'s to this point. */ + "2: rep movsb;" + "mov %%rsp,%%rsi;" + "3: pop %%rax;" + + "pop %%r15; pop %%r14; pop %%r13; pop %%r12;" + "pop %%r11; pop %%r10; pop %%r9; pop %%r8;" "pop %%rbp; pop %%rdx; pop %%rbx; pop %%rax" : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy) : "i" (PAGE_SIZE), "0" (0), "1" (cpu_info), "2" (wqv->stack) @@ -190,11 +201,18 @@ wait(); /* takes us back into the scheduler */ } + /* + * Hand-rolled longjmp(). Returns to the pointer on the top of + * wqv->stack, and lands on a `rep movs` instruction. All other GPRs are + * restored from the stack, so are available for use here. + */ asm volatile ( - "mov %1,%%"__OP"sp; jmp *(%0)" + "mov %1,%%"__OP"sp; INDIRECT_JMP %[ip]" : : "S" (wqv->stack), "D" (wqv->esp), - "c" ((char *)get_cpu_info() - (char *)wqv->esp) + "c" ((char *)get_cpu_info() - (char *)wqv->esp), + [ip] "r" (*(unsigned long *)wqv->stack) : "memory" ); + unreachable(); } #else /* !CONFIG_X86 */ diff -Nru xen-4.9.0/xen/drivers/cpufreq/cpufreq_ondemand.c xen-4.9.2/xen/drivers/cpufreq/cpufreq_ondemand.c --- xen-4.9.0/xen/drivers/cpufreq/cpufreq_ondemand.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/drivers/cpufreq/cpufreq_ondemand.c 2018-03-28 13:10:55.000000000 +0000 @@ -204,7 +204,14 @@ static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) { dbs_info->enable = 0; - dbs_info->stoppable = 0; + + /* + * The timer function may be running (from cpufreq_dbs_timer_resume) - + * wait for it to complete. + */ + while ( cmpxchg(&dbs_info->stoppable, 1, 0) < 0 ) + cpu_relax(); + kill_timer(&per_cpu(dbs_timer, dbs_info->cpu)); } @@ -273,6 +280,10 @@ break; case CPUFREQ_GOV_STOP: + if ( !this_dbs_info->enable ) + /* Already not enabled */ + break; + dbs_timer_exit(this_dbs_info); dbs_enable--; @@ -365,23 +376,22 @@ void cpufreq_dbs_timer_resume(void) { - int cpu; - struct timer* t; - s_time_t now; + unsigned int cpu = smp_processor_id(); + int8_t *stoppable = &per_cpu(cpu_dbs_info, cpu).stoppable; - cpu = smp_processor_id(); - - if ( per_cpu(cpu_dbs_info,cpu).stoppable ) + if ( *stoppable ) { - now = NOW(); - t = &per_cpu(dbs_timer, cpu); - if (t->expires <= now) + s_time_t now = NOW(); + struct timer *t = &per_cpu(dbs_timer, cpu); + + if ( t->expires <= now ) { + if ( !cmpxchg(stoppable, 1, -1) ) + return; t->function(t->data); + (void)cmpxchg(stoppable, -1, 1); } else - { - set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate)); - } + set_timer(t, align_timer(now, dbs_tuners_ins.sampling_rate)); } } diff -Nru xen-4.9.0/xen/drivers/passthrough/pci.c xen-4.9.2/xen/drivers/passthrough/pci.c --- xen-4.9.0/xen/drivers/passthrough/pci.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/drivers/passthrough/pci.c 2018-03-28 13:10:55.000000000 +0000 @@ -595,21 +595,24 @@ unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn); const char *pdev_type; int ret; + bool pf_is_extfn = false; - if (!info) + if ( !info ) pdev_type = "device"; - else if (info->is_extfn) - pdev_type = "extended function"; - else if (info->is_virtfn) + else if ( info->is_virtfn ) { pcidevs_lock(); pdev = pci_get_pdev(seg, info->physfn.bus, info->physfn.devfn); + if ( pdev ) + pf_is_extfn = pdev->info.is_extfn; pcidevs_unlock(); if ( !pdev ) pci_add_device(seg, info->physfn.bus, info->physfn.devfn, NULL, node); pdev_type = "virtual function"; } + else if ( info->is_extfn ) + pdev_type = "extended function"; else { info = NULL; @@ -633,7 +636,15 @@ pdev->node = node; if ( info ) + { pdev->info = *info; + /* + * VF's 'is_extfn' field is used to indicate whether its PF is an + * extended function. + */ + if ( pdev->info.is_virtfn ) + pdev->info.is_extfn = pf_is_extfn; + } else if ( !pdev->vf_rlen[0] ) { unsigned int pos = pci_find_ext_capability(seg, bus, devfn, diff -Nru xen-4.9.0/xen/drivers/passthrough/vtd/dmar.c xen-4.9.2/xen/drivers/passthrough/vtd/dmar.c --- xen-4.9.0/xen/drivers/passthrough/vtd/dmar.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/drivers/passthrough/vtd/dmar.c 2018-03-28 13:10:55.000000000 +0000 @@ -211,15 +211,15 @@ if ( pdev == NULL ) return NULL; - if ( pdev->info.is_extfn ) + if ( pdev->info.is_virtfn ) { - bus = pdev->bus; - devfn = 0; + bus = pdev->info.physfn.bus; + devfn = !pdev->info.is_extfn ? pdev->info.physfn.devfn : 0; } - else if ( pdev->info.is_virtfn ) + else if ( pdev->info.is_extfn ) { - bus = pdev->info.physfn.bus; - devfn = PCI_SLOT(pdev->info.physfn.devfn) ? 0 : pdev->info.physfn.devfn; + bus = pdev->bus; + devfn = 0; } else { diff -Nru xen-4.9.0/xen/drivers/passthrough/vtd/iommu.c xen-4.9.2/xen/drivers/passthrough/vtd/iommu.c --- xen-4.9.0/xen/drivers/passthrough/vtd/iommu.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/drivers/passthrough/vtd/iommu.c 2018-03-28 13:10:55.000000000 +0000 @@ -747,14 +747,24 @@ unsigned long flags; struct iommu *iommu = drhd->iommu; - if ( is_igd_drhd(drhd) && !is_igd_vt_enabled_quirk() ) + if ( is_igd_drhd(drhd) ) { - if ( force_iommu ) - panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose"); + if ( !iommu_igfx ) + { + printk(XENLOG_INFO VTDPREFIX + "Passed iommu=no-igfx option. Disabling IGD VT-d engine.\n"); + return; + } - printk(XENLOG_WARNING VTDPREFIX - "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n"); - return; + if ( !is_igd_vt_enabled_quirk() ) + { + if ( force_iommu ) + panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose"); + + printk(XENLOG_WARNING VTDPREFIX + "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n"); + return; + } } /* apply platform specific errata workarounds */ diff -Nru xen-4.9.0/xen/drivers/passthrough/vtd/quirks.c xen-4.9.2/xen/drivers/passthrough/vtd/quirks.c --- xen-4.9.0/xen/drivers/passthrough/vtd/quirks.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/drivers/passthrough/vtd/quirks.c 2018-03-28 13:10:55.000000000 +0000 @@ -70,9 +70,6 @@ { u16 ggc; - if ( !iommu_igfx ) - return 0; - if ( !IS_ILK(ioh_id) ) return 1; diff -Nru xen-4.9.0/xen/include/acpi/cpufreq/cpufreq.h xen-4.9.2/xen/include/acpi/cpufreq/cpufreq.h --- xen-4.9.0/xen/include/acpi/cpufreq/cpufreq.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/acpi/cpufreq/cpufreq.h 2018-03-28 13:10:55.000000000 +0000 @@ -227,8 +227,8 @@ struct cpufreq_frequency_table *freq_table; int cpu; unsigned int enable:1; - unsigned int stoppable:1; unsigned int turbo_enabled:1; + int8_t stoppable; }; int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); diff -Nru xen-4.9.0/xen/include/asm-arm/cpuerrata.h xen-4.9.2/xen/include/asm-arm/cpuerrata.h --- xen-4.9.0/xen/include/asm-arm/cpuerrata.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-arm/cpuerrata.h 2018-03-28 13:10:55.000000000 +0000 @@ -5,6 +5,7 @@ #include void check_local_cpu_errata(void); +void enable_errata_workarounds(void); #ifdef CONFIG_HAS_ALTERNATIVE diff -Nru xen-4.9.0/xen/include/asm-arm/cpufeature.h xen-4.9.2/xen/include/asm-arm/cpufeature.h --- xen-4.9.0/xen/include/asm-arm/cpufeature.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-arm/cpufeature.h 2018-03-28 13:10:55.000000000 +0000 @@ -42,8 +42,9 @@ #define LIVEPATCH_FEATURE 4 #define SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT 5 #define SKIP_CTXT_SWITCH_SERROR_SYNC 6 +#define ARM_HARDEN_BRANCH_PREDICTOR 7 -#define ARM_NCAPS 7 +#define ARM_NCAPS 8 #ifndef __ASSEMBLY__ @@ -74,6 +75,7 @@ const char *desc; u16 capability; bool_t (*matches)(const struct arm_cpu_capabilities *); + int (*enable)(void *); /* Called on every active CPUs */ union { struct { /* To be used for eratum handling only */ u32 midr_model; @@ -85,6 +87,8 @@ void update_cpu_capabilities(const struct arm_cpu_capabilities *caps, const char *info); +void enable_cpu_capabilities(const struct arm_cpu_capabilities *caps); + #endif /* __ASSEMBLY__ */ #endif diff -Nru xen-4.9.0/xen/include/asm-arm/domain.h xen-4.9.2/xen/include/asm-arm/domain.h --- xen-4.9.0/xen/include/asm-arm/domain.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-arm/domain.h 2018-03-28 13:10:55.000000000 +0000 @@ -50,7 +50,8 @@ struct p2m_domain p2m; struct hvm_domain hvm_domain; - gfn_t *grant_table_gfn; + gfn_t *grant_shared_gfn; + gfn_t *grant_status_gfn; struct vmmio vmmio; diff -Nru xen-4.9.0/xen/include/asm-arm/flushtlb.h xen-4.9.2/xen/include/asm-arm/flushtlb.h --- xen-4.9.0/xen/include/asm-arm/flushtlb.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-arm/flushtlb.h 2018-03-28 13:10:55.000000000 +0000 @@ -12,6 +12,11 @@ #define tlbflush_current_time() (0) +static inline void page_set_tlbflush_timestamp(struct page_info *page) +{ + page->tlbflush_timestamp = tlbflush_current_time(); +} + #if defined(CONFIG_ARM_32) # include #elif defined(CONFIG_ARM_64) diff -Nru xen-4.9.0/xen/include/asm-arm/grant_table.h xen-4.9.2/xen/include/asm-arm/grant_table.h --- xen-4.9.0/xen/include/asm-arm/grant_table.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-arm/grant_table.h 2018-03-28 13:10:55.000000000 +0000 @@ -14,13 +14,23 @@ unsigned long new_gpaddr, unsigned int flags); void gnttab_mark_dirty(struct domain *d, unsigned long l); #define gnttab_create_status_page(d, t, i) do {} while (0) -#define gnttab_status_gmfn(d, t, i) (0) #define gnttab_release_host_mappings(domain) 1 static inline int replace_grant_supported(void) { return 1; } +#define gnttab_set_frame_gfn(d, st, idx, gfn) \ + do { \ + ((st) ? (d)->arch.grant_status_gfn \ + : (d)->arch.grant_shared_gfn)[idx] = (gfn); \ + } while ( 0 ) + +#define gnttab_get_frame_gfn(d, st, idx) ({ \ + _gfn((st) ? gnttab_status_gmfn(d, (d)->grant_table, idx) \ + : gnttab_shared_gmfn(d, (d)->grant_table, idx)); \ +}) + #define gnttab_create_shared_page(d, t, i) \ do { \ share_xen_page_with_guest( \ @@ -29,8 +39,12 @@ } while ( 0 ) #define gnttab_shared_gmfn(d, t, i) \ - ( ((i >= nr_grant_frames(d->grant_table)) && \ - (i < max_grant_frames)) ? 0 : gfn_x(d->arch.grant_table_gfn[i])) + gfn_x(((i) >= nr_grant_frames(t)) ? INVALID_GFN \ + : (d)->arch.grant_shared_gfn[i]) + +#define gnttab_status_gmfn(d, t, i) \ + gfn_x(((i) >= nr_status_frames(t)) ? INVALID_GFN \ + : (d)->arch.grant_status_gfn[i]) #define gnttab_need_iommu_mapping(d) \ (is_domain_direct_mapped(d) && need_iommu(d)) diff -Nru xen-4.9.0/xen/include/asm-arm/numa.h xen-4.9.2/xen/include/asm-arm/numa.h --- xen-4.9.0/xen/include/asm-arm/numa.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-arm/numa.h 2018-03-28 13:10:55.000000000 +0000 @@ -12,9 +12,15 @@ return 0; } +/* + * TODO: make first_valid_mfn static when NUMA is supported on Arm, this + * is required because the dummy helpers are using it. + */ +extern unsigned long first_valid_mfn; + /* XXX: implement NUMA support */ -#define node_spanned_pages(nid) (total_pages) -#define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx)) +#define node_spanned_pages(nid) (max_page - first_valid_mfn) +#define node_start_pfn(nid) (first_valid_mfn) #define __node_distance(a, b) (20) static inline unsigned int arch_get_dma_bitsize(void) diff -Nru xen-4.9.0/xen/include/asm-arm/processor.h xen-4.9.2/xen/include/asm-arm/processor.h --- xen-4.9.0/xen/include/asm-arm/processor.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-arm/processor.h 2018-03-28 13:10:55.000000000 +0000 @@ -46,13 +46,23 @@ #define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_PART_CORTEX_A12 0xC0D +#define ARM_CPU_PART_CORTEX_A17 0xC0E #define ARM_CPU_PART_CORTEX_A15 0xC0F #define ARM_CPU_PART_CORTEX_A53 0xD03 #define ARM_CPU_PART_CORTEX_A57 0xD07 +#define ARM_CPU_PART_CORTEX_A72 0xD08 +#define ARM_CPU_PART_CORTEX_A73 0xD09 +#define ARM_CPU_PART_CORTEX_A75 0xD0A +#define MIDR_CORTEX_A12 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A12) +#define MIDR_CORTEX_A17 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A17) #define MIDR_CORTEX_A15 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A15) #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) +#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) +#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) /* MPIDR Multiprocessor Affinity Register */ #define _MPIDR_UP (30) @@ -320,8 +330,9 @@ unsigned long fp:4; /* Floating Point */ unsigned long simd:4; /* Advanced SIMD */ unsigned long gic:4; /* GIC support */ - unsigned long __res0:4; - unsigned long __res1; + unsigned long __res0:28; + unsigned long csv2:4; + unsigned long __res1:4; }; } pfr64; diff -Nru xen-4.9.0/xen/include/asm-x86/alternative-asm.h xen-4.9.2/xen/include/asm-x86/alternative-asm.h --- xen-4.9.0/xen/include/asm-x86/alternative-asm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/alternative-asm.h 2018-03-28 13:10:55.000000000 +0000 @@ -0,0 +1,77 @@ +#ifndef _ASM_X86_ALTERNATIVE_ASM_H_ +#define _ASM_X86_ALTERNATIVE_ASM_H_ + +#ifdef __ASSEMBLY__ + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .word \feature + .byte \orig_len + .byte \alt_len +.endm + +.macro ALTERNATIVE oldinstr, newinstr, feature +.Lold_start_\@: + \oldinstr +.Lold_end_\@: + + .pushsection .altinstructions, "a", @progbits + altinstruction_entry .Lold_start_\@, .Lnew_start_\@, \feature, \ + (.Lold_end_\@ - .Lold_start_\@), (.Lnew_end_\@ - .Lnew_start_\@) + + .section .discard, "a", @progbits + /* Assembler-time check that \newinstr isn't longer than \oldinstr. */ + .byte 0xff + (.Lnew_end_\@ - .Lnew_start_\@) - (.Lold_end_\@ - .Lold_start_\@) + + .section .altinstr_replacement, "ax", @progbits +.Lnew_start_\@: + \newinstr +.Lnew_end_\@: + .popsection +.endm + +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +.Lold_start_\@: + \oldinstr +.Lold_end_\@: + + .pushsection .altinstructions, "a", @progbits + altinstruction_entry .Lold_start_\@, .Lnew1_start_\@, \feature1, \ + (.Lold_end_\@ - .Lold_start_\@), (.Lnew1_end_\@ - .Lnew1_start_\@) + altinstruction_entry .Lold_start_\@, .Lnew2_start_\@, \feature2, \ + (.Lold_end_\@ - .Lold_start_\@), (.Lnew2_end_\@ - .Lnew2_start_\@) + + .section .discard, "a", @progbits + /* Assembler-time check that \newinstr{1,2} aren't longer than \oldinstr. */ + .byte 0xff + (.Lnew1_end_\@ - .Lnew1_start_\@) - (.Lold_end_\@ - .Lold_start_\@) + .byte 0xff + (.Lnew2_end_\@ - .Lnew2_start_\@) - (.Lold_end_\@ - .Lold_start_\@) + + .section .altinstr_replacement, "ax", @progbits +.Lnew1_start_\@: + \newinstr1 +.Lnew1_end_\@: +.Lnew2_start_\@: + \newinstr2 +.Lnew2_end_\@: + .popsection +.endm + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_ALTERNATIVE_ASM_H_ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/xen/include/asm-x86/alternative.h xen-4.9.2/xen/include/asm-x86/alternative.h --- xen-4.9.0/xen/include/asm-x86/alternative.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/alternative.h 2018-03-28 13:10:55.000000000 +0000 @@ -1,17 +1,10 @@ #ifndef __X86_ALTERNATIVE_H__ #define __X86_ALTERNATIVE_H__ +#include #include -#ifdef __ASSEMBLY__ -.macro altinstruction_entry orig alt feature orig_len alt_len - .long \orig - . - .long \alt - . - .word \feature - .byte \orig_len - .byte \alt_len -.endm -#else +#ifndef __ASSEMBLY__ #include #include @@ -145,6 +138,6 @@ /* Use this macro(s) if you need more than one output parameter. */ #define ASM_OUTPUT2(a...) a -#endif /* __ASSEMBLY__ */ +#endif /* !__ASSEMBLY__ */ #endif /* __X86_ALTERNATIVE_H__ */ diff -Nru xen-4.9.0/xen/include/asm-x86/asm_defns.h xen-4.9.2/xen/include/asm-x86/asm_defns.h --- xen-4.9.0/xen/include/asm-x86/asm_defns.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/asm_defns.h 2018-03-28 13:10:55.000000000 +0000 @@ -7,23 +7,23 @@ #include #endif #include +#include #include #include #include #include #include -#ifndef __ASSEMBLY__ -void ret_from_intr(void); +#ifdef __ASSEMBLY__ +# include +#else +asm ( "\t.equ CONFIG_INDIRECT_THUNK, " + __stringify(IS_ENABLED(CONFIG_INDIRECT_THUNK)) ); +asm ( "\t.include \"asm/indirect_thunk_asm.h\"" ); #endif -#ifdef CONFIG_FRAME_POINTER -/* Indicate special exception stack frame by inverting the frame pointer. */ -#define SETUP_EXCEPTION_FRAME_POINTER(offs) \ - leaq offs(%rsp),%rbp; \ - notq %rbp -#else -#define SETUP_EXCEPTION_FRAME_POINTER(offs) +#ifndef __ASSEMBLY__ +void ret_from_intr(void); #endif #ifndef NDEBUG @@ -42,31 +42,6 @@ #define ASSERT_INTERRUPTS_DISABLED \ ASSERT_INTERRUPT_STATUS(z, "INTERRUPTS DISABLED") -/* - * This flag is set in an exception frame when registers R12-R15 did not get - * saved. - */ -#define _TRAP_regs_partial 16 -#define TRAP_regs_partial (1 << _TRAP_regs_partial) -/* - * This flag gets set in an exception frame when registers R12-R15 possibly - * get modified from their originally saved values and hence need to be - * restored even if the normal call flow would restore register values. - * - * The flag being set implies _TRAP_regs_partial to be unset. Restoring - * R12-R15 thus is - * - required when this flag is set, - * - safe when _TRAP_regs_partial is unset. - */ -#define _TRAP_regs_dirty 17 -#define TRAP_regs_dirty (1 << _TRAP_regs_dirty) - -#define mark_regs_dirty(r) ({ \ - struct cpu_user_regs *r__ = (r); \ - ASSERT(!((r__)->entry_vector & TRAP_regs_partial)); \ - r__->entry_vector |= TRAP_regs_dirty; \ -}) - #ifdef __ASSEMBLY__ # define _ASM_EX(p) p-. #else @@ -127,9 +102,30 @@ UNLIKELY_DONE(mp, tag); \ __UNLIKELY_END(tag) + .equ .Lrax, 0 + .equ .Lrcx, 1 + .equ .Lrdx, 2 + .equ .Lrbx, 3 + .equ .Lrsp, 4 + .equ .Lrbp, 5 + .equ .Lrsi, 6 + .equ .Lrdi, 7 + .equ .Lr8, 8 + .equ .Lr9, 9 + .equ .Lr10, 10 + .equ .Lr11, 11 + .equ .Lr12, 12 + .equ .Lr13, 13 + .equ .Lr14, 14 + .equ .Lr15, 15 + #define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field) #define GET_STACK_END(reg) \ + .if .Lr##reg >= 8; \ + movq $STACK_SIZE-1, %r##reg; \ + .else; \ movl $STACK_SIZE-1, %e##reg; \ + .endif; \ orq %rsp, %r##reg #define GET_CPUINFO_FIELD(field, reg) \ @@ -236,7 +232,7 @@ #endif #ifdef __ASSEMBLY__ -.macro SAVE_ALL op +.macro SAVE_ALL op, compat=0 .ifeqs "\op", "CLAC" ASM_CLAC .else @@ -251,66 +247,45 @@ addq $-(UREGS_error_code-UREGS_r15), %rsp cld movq %rdi,UREGS_rdi(%rsp) + xor %edi, %edi movq %rsi,UREGS_rsi(%rsp) + xor %esi, %esi movq %rdx,UREGS_rdx(%rsp) + xor %edx, %edx movq %rcx,UREGS_rcx(%rsp) + xor %ecx, %ecx movq %rax,UREGS_rax(%rsp) - movq %r8,UREGS_r8(%rsp) - movq %r9,UREGS_r9(%rsp) - movq %r10,UREGS_r10(%rsp) - movq %r11,UREGS_r11(%rsp) - movq %rbx,UREGS_rbx(%rsp) - movq %rbp,UREGS_rbp(%rsp) - SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp) - movq %r12,UREGS_r12(%rsp) - movq %r13,UREGS_r13(%rsp) - movq %r14,UREGS_r14(%rsp) - movq %r15,UREGS_r15(%rsp) -.endm - -/* - * Save all registers not preserved by C code or used in entry/exit code. Mark - * the frame as partial. - * - * @type: exception type - * @compat: R8-R15 don't need saving, and the frame nevertheless is complete - */ -.macro SAVE_VOLATILE type compat=0 -.if \compat - movl $\type,UREGS_entry_vector-UREGS_error_code(%rsp) -.else - movl $\type|TRAP_regs_partial,\ - UREGS_entry_vector-UREGS_error_code(%rsp) -.endif - addq $-(UREGS_error_code-UREGS_r15),%rsp - cld - movq %rdi,UREGS_rdi(%rsp) - movq %rsi,UREGS_rsi(%rsp) - movq %rdx,UREGS_rdx(%rsp) - movq %rcx,UREGS_rcx(%rsp) - movq %rax,UREGS_rax(%rsp) + xor %eax, %eax .if !\compat movq %r8,UREGS_r8(%rsp) movq %r9,UREGS_r9(%rsp) movq %r10,UREGS_r10(%rsp) movq %r11,UREGS_r11(%rsp) .endif + xor %r8d, %r8d + xor %r9d, %r9d + xor %r10d, %r10d + xor %r11d, %r11d movq %rbx,UREGS_rbx(%rsp) + xor %ebx, %ebx movq %rbp,UREGS_rbp(%rsp) - SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp) -.endm - -/* - * Complete a frame potentially only partially saved. - */ -.macro SAVE_PRESERVED - btrl $_TRAP_regs_partial,UREGS_entry_vector(%rsp) - jnc 987f +#ifdef CONFIG_FRAME_POINTER +/* Indicate special exception stack frame by inverting the frame pointer. */ + leaq UREGS_rbp(%rsp), %rbp + notq %rbp +#else + xor %ebp, %ebp +#endif +.if !\compat movq %r12,UREGS_r12(%rsp) movq %r13,UREGS_r13(%rsp) movq %r14,UREGS_r14(%rsp) movq %r15,UREGS_r15(%rsp) -987: +.endif + xor %r12d, %r12d + xor %r13d, %r13d + xor %r14d, %r14d + xor %r15d, %r15d .endm #define LOAD_ONE_REG(reg, compat) \ @@ -330,52 +305,34 @@ */ .macro RESTORE_ALL adj=0 compat=0 .if !\compat - testl $TRAP_regs_dirty,UREGS_entry_vector(%rsp) + movq UREGS_r15(%rsp), %r15 + movq UREGS_r14(%rsp), %r14 + movq UREGS_r13(%rsp), %r13 + movq UREGS_r12(%rsp), %r12 +.else + xor %r15d, %r15d + xor %r14d, %r14d + xor %r13d, %r13d + xor %r12d, %r12d +.endif + LOAD_ONE_REG(bp, \compat) + LOAD_ONE_REG(bx, \compat) +.if !\compat movq UREGS_r11(%rsp),%r11 movq UREGS_r10(%rsp),%r10 movq UREGS_r9(%rsp),%r9 movq UREGS_r8(%rsp),%r8 .else - xor %r11, %r11 - xor %r10, %r10 - xor %r9, %r9 - xor %r8, %r8 + xor %r11d, %r11d + xor %r10d, %r10d + xor %r9d, %r9d + xor %r8d, %r8d .endif LOAD_ONE_REG(ax, \compat) LOAD_ONE_REG(cx, \compat) LOAD_ONE_REG(dx, \compat) LOAD_ONE_REG(si, \compat) LOAD_ONE_REG(di, \compat) -.if !\compat - jz 987f - movq UREGS_r15(%rsp),%r15 - movq UREGS_r14(%rsp),%r14 - movq UREGS_r13(%rsp),%r13 - movq UREGS_r12(%rsp),%r12 -#ifndef NDEBUG - .subsection 1 -987: testl $TRAP_regs_partial,UREGS_entry_vector(%rsp) - jnz 987f - cmpq UREGS_r15(%rsp),%r15 - jne 789f - cmpq UREGS_r14(%rsp),%r14 - jne 789f - cmpq UREGS_r13(%rsp),%r13 - jne 789f - cmpq UREGS_r12(%rsp),%r12 - je 987f -789: BUG /* Corruption of partial register state. */ - .subsection 0 -#endif -.else - xor %r15, %r15 - xor %r14, %r14 - xor %r13, %r13 - xor %r12, %r12 -.endif -987: - LOAD_ONE_REG(bp, \compat) - LOAD_ONE_REG(bx, \compat) subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp .endm @@ -409,4 +366,6 @@ #define REX64_PREFIX "rex64/" #endif +#include + #endif /* __X86_ASM_DEFNS_H__ */ diff -Nru xen-4.9.0/xen/include/asm-x86/cpufeature.h xen-4.9.2/xen/include/asm-x86/cpufeature.h --- xen-4.9.0/xen/include/asm-x86/cpufeature.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/cpufeature.h 2018-03-28 13:10:55.000000000 +0000 @@ -104,6 +104,7 @@ #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) #define cpu_has_aperfmperf boot_cpu_has(X86_FEATURE_APERFMPERF) +#define cpu_has_lfence_dispatch boot_cpu_has(X86_FEATURE_LFENCE_DISPATCH) enum _cache_type { CACHE_TYPE_NULL = 0, diff -Nru xen-4.9.0/xen/include/asm-x86/cpufeatures.h xen-4.9.2/xen/include/asm-x86/cpufeatures.h --- xen-4.9.0/xen/include/asm-x86/cpufeatures.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/cpufeatures.h 2018-03-28 13:10:55.000000000 +0000 @@ -22,3 +22,13 @@ XEN_CPUFEATURE(MFENCE_RDTSC, (FSCAPINTS+0)*32+ 9) /* MFENCE synchronizes RDTSC */ XEN_CPUFEATURE(XEN_SMEP, (FSCAPINTS+0)*32+10) /* SMEP gets used by Xen itself */ XEN_CPUFEATURE(XEN_SMAP, (FSCAPINTS+0)*32+11) /* SMAP gets used by Xen itself */ +XEN_CPUFEATURE(MSR_PLATFORM_INFO, (FSCAPINTS+0)*32+12) /* PLATFORM_INFO MSR present */ +XEN_CPUFEATURE(MSR_MISC_FEATURES, (FSCAPINTS+0)*32+13) /* MISC_FEATURES_ENABLES MSR present */ +XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+14) /* lfence set as Dispatch Serialising */ +XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+15) /* Use IND_THUNK_LFENCE */ +XEN_CPUFEATURE(IND_THUNK_JMP, (FSCAPINTS+0)*32+16) /* Use IND_THUNK_JMP */ +XEN_CPUFEATURE(XEN_IBPB, (FSCAPINTS+0)*32+17) /* IBRSB || IBPB */ +XEN_CPUFEATURE(XEN_IBRS_SET, (FSCAPINTS+0)*32+18) /* IBRSB && IRBS set in Xen */ +XEN_CPUFEATURE(XEN_IBRS_CLEAR, (FSCAPINTS+0)*32+19) /* IBRSB && IBRS clear in Xen */ +XEN_CPUFEATURE(RSB_NATIVE, (FSCAPINTS+0)*32+20) /* RSB overwrite needed for native */ +XEN_CPUFEATURE(RSB_VMEXIT, (FSCAPINTS+0)*32+21) /* RSB overwrite needed for vmexit */ diff -Nru xen-4.9.0/xen/include/asm-x86/current.h xen-4.9.2/xen/include/asm-x86/current.h --- xen-4.9.0/xen/include/asm-x86/current.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/current.h 2018-03-28 13:10:55.000000000 +0000 @@ -41,6 +41,25 @@ struct vcpu *current_vcpu; unsigned long per_cpu_offset; unsigned long cr4; + /* + * Of the two following fields the latter is being set to the CR3 value + * to be used on the given pCPU for loading whenever 64-bit PV guest + * context is being entered. The value never changes once set. + * The former is the value to restore when re-entering Xen, if any. IOW + * its value being zero means there's nothing to restore. However, its + * value can also be negative, indicating to the exit-to-Xen code that + * restoring is not necessary, but allowing any nested entry code paths + * to still know the value to put back into CR3. + */ + unsigned long xen_cr3; + unsigned long pv_cr3; + + /* See asm-x86/spec_ctrl_asm.h for usage. */ + unsigned int shadow_spec_ctrl; + bool use_shadow_spec_ctrl; + uint8_t bti_ist_info; + + unsigned long __pad; /* get_stack_bottom() must be 16-byte aligned */ }; diff -Nru xen-4.9.0/xen/include/asm-x86/domain.h xen-4.9.2/xen/include/asm-x86/domain.h --- xen-4.9.0/xen/include/asm-x86/domain.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/domain.h 2018-03-28 13:10:55.000000000 +0000 @@ -76,6 +76,10 @@ /* x86/64: toggle guest between kernel and user modes. */ void toggle_guest_mode(struct vcpu *); +/* x86/64: toggle guest page tables between kernel and user modes. */ +void toggle_guest_pt(struct vcpu *); + +void cpuid_policy_updated(struct vcpu *v); /* * Initialise a hypercall-transfer page. The given pointer must be mapped @@ -527,6 +531,8 @@ pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ pagetable_t guest_table; /* (MFN) guest notion of cr3 */ struct page_info *old_guest_table; /* partially destructed pagetable */ + struct page_info *old_guest_ptpg; /* containing page table of the */ + /* former, if any */ /* guest_table holds a ref to the page, and also a type-count unless * shadow refcounts are in use */ pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ @@ -566,6 +572,8 @@ struct paging_vcpu paging; + uint32_t spec_ctrl; + uint32_t gdbsx_vcpu_event; /* A secondary copy of the vcpu time info. */ diff -Nru xen-4.9.0/xen/include/asm-x86/flushtlb.h xen-4.9.2/xen/include/asm-x86/flushtlb.h --- xen-4.9.0/xen/include/asm-x86/flushtlb.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/flushtlb.h 2018-03-28 13:10:55.000000000 +0000 @@ -23,6 +23,20 @@ #define tlbflush_current_time() tlbflush_clock +static inline void page_set_tlbflush_timestamp(struct page_info *page) +{ + /* + * Prevent storing a stale time stamp, which could happen if an update + * to tlbflush_clock plus a subsequent flush IPI happen between the + * reading of tlbflush_clock and the writing of the struct page_info + * field. + */ + ASSERT(local_irq_is_enabled()); + local_irq_disable(); + page->tlbflush_timestamp = tlbflush_current_time(); + local_irq_enable(); +} + /* * @cpu_stamp is the timestamp at last TLB flush for the CPU we are testing. * @lastuse_stamp is a timestamp taken when the PFN we are testing was last diff -Nru xen-4.9.0/xen/include/asm-x86/grant_table.h xen-4.9.2/xen/include/asm-x86/grant_table.h --- xen-4.9.0/xen/include/asm-x86/grant_table.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/grant_table.h 2018-03-28 13:10:55.000000000 +0000 @@ -18,6 +18,14 @@ int replace_grant_host_mapping( uint64_t addr, unsigned long frame, uint64_t new_addr, unsigned int flags); +#define gnttab_set_frame_gfn(d, st, idx, gfn) do {} while ( 0 ) +#define gnttab_get_frame_gfn(d, st, idx) ({ \ + unsigned long mfn_ = (st) ? gnttab_status_mfn((d)->grant_table, idx) \ + : gnttab_shared_mfn((d)->grant_table, idx); \ + unsigned long gpfn_ = get_gpfn_from_mfn(mfn_); \ + VALID_M2P(gpfn_) ? _gfn(gpfn_) : INVALID_GFN; \ +}) + #define gnttab_create_shared_page(d, t, i) \ do { \ share_xen_page_with_guest( \ @@ -33,11 +41,11 @@ } while ( 0 ) -#define gnttab_shared_mfn(d, t, i) \ +#define gnttab_shared_mfn(t, i) \ ((virt_to_maddr((t)->shared_raw[i]) >> PAGE_SHIFT)) #define gnttab_shared_gmfn(d, t, i) \ - (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i))) + (mfn_to_gmfn(d, gnttab_shared_mfn(t, i))) #define gnttab_status_mfn(t, i) \ diff -Nru xen-4.9.0/xen/include/asm-x86/hvm/hvm.h xen-4.9.2/xen/include/asm-x86/hvm/hvm.h --- xen-4.9.0/xen/include/asm-x86/hvm/hvm.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/hvm/hvm.h 2018-03-28 13:10:55.000000000 +0000 @@ -135,7 +135,7 @@ void (*update_guest_cr)(struct vcpu *v, unsigned int cr); void (*update_guest_efer)(struct vcpu *v); - void (*update_guest_vendor)(struct vcpu *v); + void (*cpuid_policy_changed)(struct vcpu *v); void (*fpu_leave)(struct vcpu *v); @@ -332,9 +332,9 @@ hvm_funcs.update_guest_efer(v); } -static inline void hvm_update_guest_vendor(struct vcpu *v) +static inline void hvm_cpuid_policy_changed(struct vcpu *v) { - hvm_funcs.update_guest_vendor(v); + hvm_funcs.cpuid_policy_changed(v); } /* diff -Nru xen-4.9.0/xen/include/asm-x86/hvm/vmx/vmcs.h xen-4.9.2/xen/include/asm-x86/hvm/vmx/vmcs.h --- xen-4.9.0/xen/include/asm-x86/hvm/vmx/vmcs.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/hvm/vmx/vmcs.h 2018-03-28 13:10:55.000000000 +0000 @@ -136,7 +136,7 @@ /* Are we emulating rather than VMENTERing? */ uint8_t vmx_emulate; - bool lbr_tsx_fixup_enabled; + uint8_t lbr_fixup_enabled; /* Bitmask of segments that we can't safely use in virtual 8086 mode */ uint16_t vm86_segment_mask; diff -Nru xen-4.9.0/xen/include/asm-x86/hvm/vmx/vvmx.h xen-4.9.2/xen/include/asm-x86/hvm/vmx/vvmx.h --- xen-4.9.0/xen/include/asm-x86/hvm/vmx/vvmx.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/hvm/vmx/vvmx.h 2018-03-28 13:10:55.000000000 +0000 @@ -224,8 +224,6 @@ int nvmx_handle_invvpid(struct cpu_user_regs *regs); int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content); -int nvmx_msr_write_intercept(unsigned int msr, - u64 msr_content); void nvmx_update_exec_control(struct vcpu *v, u32 value); void nvmx_update_secondary_exec_control(struct vcpu *v, diff -Nru xen-4.9.0/xen/include/asm-x86/indirect_thunk_asm.h xen-4.9.2/xen/include/asm-x86/indirect_thunk_asm.h --- xen-4.9.0/xen/include/asm-x86/indirect_thunk_asm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/indirect_thunk_asm.h 2018-03-28 13:10:55.000000000 +0000 @@ -0,0 +1,41 @@ +/* + * Warning! This file is included at an assembler level for .c files, causing + * usual #ifdef'ary to turn into comments. + */ + +.macro INDIRECT_BRANCH insn:req arg:req +/* + * Create an indirect branch. insn is one of call/jmp, arg is a single + * register. + * + * With no compiler support, this degrades into a plain indirect call/jmp. + * With compiler support, dispatch to the correct __x86_indirect_thunk_* + */ + .if CONFIG_INDIRECT_THUNK == 1 + + $done = 0 + .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 + .ifeqs "\arg", "%r\reg" + \insn __x86_indirect_thunk_r\reg + $done = 1 + .exitm + .endif + .endr + + .if $done != 1 + .error "Bad register arg \arg" + .endif + + .else + \insn *\arg + .endif +.endm + +/* Convenience wrappers. */ +.macro INDIRECT_CALL arg:req + INDIRECT_BRANCH call \arg +.endm + +.macro INDIRECT_JMP arg:req + INDIRECT_BRANCH jmp \arg +.endm diff -Nru xen-4.9.0/xen/include/asm-x86/mm.h xen-4.9.2/xen/include/asm-x86/mm.h --- xen-4.9.0/xen/include/asm-x86/mm.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/mm.h 2018-03-28 13:10:55.000000000 +0000 @@ -82,7 +82,8 @@ unsigned long type:5; /* What kind of shadow is this? */ unsigned long pinned:1; /* Is the shadow pinned? */ unsigned long head:1; /* Is this the first page of the shadow? */ - unsigned long count:25; /* Reference count */ +#define PAGE_SH_REFCOUNT_WIDTH 25 + unsigned long count:PAGE_SH_REFCOUNT_WIDTH; /* Reference count */ } sh; /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ @@ -124,11 +125,11 @@ u32 tlbflush_timestamp; /* - * When PGT_partial is true then this field is valid and indicates - * that PTEs in the range [0, @nr_validated_ptes) have been validated. - * An extra page reference must be acquired (or not dropped) whenever - * PGT_partial gets set, and it must be dropped when the flag gets - * cleared. This is so that a get() leaving a page in partially + * When PGT_partial is true then the first two fields are valid and + * indicate that PTEs in the range [0, @nr_validated_ptes) have been + * validated. An extra page reference must be acquired (or not dropped) + * whenever PGT_partial gets set, and it must be dropped when the flag + * gets cleared. This is so that a get() leaving a page in partially * validated state (where the caller would drop the reference acquired * due to the getting of the type [apparently] failing [-ERESTART]) * would not accidentally result in a page left with zero general @@ -152,10 +153,18 @@ * put_page_from_lNe() (due to the apparent failure), and hence it * must be dropped when the put operation is resumed (and completes), * but it must not be acquired if picking up the page for validation. + * + * The 3rd field, @linear_pt_count, indicates + * - by a positive value, how many same-level page table entries a page + * table has, + * - by a negative value, in how many same-level page tables a page is + * in use. */ struct { - u16 nr_validated_ptes; - s8 partial_pte; + u16 nr_validated_ptes:PAGETABLE_ORDER + 1; + u16 :16 - PAGETABLE_ORDER - 1 - 2; + s16 partial_pte:2; + s16 linear_pt_count; }; /* @@ -206,6 +215,9 @@ #define PGT_count_width PG_shift(9) #define PGT_count_mask ((1UL<arch.paging.mode & PG_translate)) #define paging_mode_external(_d) (!!((_d)->arch.paging.mode & PG_external)) -#define paging_mode_only_log_dirty(_d) \ - (((_d)->arch.paging.mode & PG_MASK) == PG_log_dirty) - /* flags used for paging debug */ #define PAGING_DEBUG_LOGDIRTY 0 diff -Nru xen-4.9.0/xen/include/asm-x86/processor.h xen-4.9.2/xen/include/asm-x86/processor.h --- xen-4.9.0/xen/include/asm-x86/processor.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/processor.h 2018-03-28 13:10:55.000000000 +0000 @@ -168,6 +168,7 @@ extern void identify_cpu(struct cpuinfo_x86 *); extern void setup_clear_cpu_cap(unsigned int); +extern void setup_force_cpu_cap(unsigned int); extern void print_cpu_info(unsigned int cpu); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); @@ -465,6 +466,7 @@ extern idt_entry_t *idt_tables[]; DECLARE_PER_CPU(struct tss_struct, init_tss); +DECLARE_PER_CPU(root_pgentry_t *, root_pgt); extern void init_int80_direct_trap(struct vcpu *v); diff -Nru xen-4.9.0/xen/include/asm-x86/spec_ctrl_asm.h xen-4.9.2/xen/include/asm-x86/spec_ctrl_asm.h --- xen-4.9.0/xen/include/asm-x86/spec_ctrl_asm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/spec_ctrl_asm.h 2018-03-28 13:10:55.000000000 +0000 @@ -0,0 +1,335 @@ +/****************************************************************************** + * include/asm-x86/spec_ctrl.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; If not, see . + * + * Copyright (c) 2017-2018 Citrix Systems Ltd. + */ + +#ifndef __X86_SPEC_CTRL_ASM_H__ +#define __X86_SPEC_CTRL_ASM_H__ + +/* Encoding of the bottom bits in cpuinfo.bti_ist_info */ +#define BTI_IST_IBRS (1 << 0) +#define BTI_IST_WRMSR (1 << 1) +#define BTI_IST_RSB (1 << 2) + +#ifdef __ASSEMBLY__ +#include + +/* + * Saving and restoring MSR_SPEC_CTRL state is a little tricky. + * + * We want the guests choice of SPEC_CTRL while in guest context, and Xen's + * choice (set or clear, depending on the hardware) while running in Xen + * context. Therefore, a simplistic algorithm is: + * + * - Set/clear IBRS on entry to Xen + * - Set the guests' choice on exit to guest + * - Leave SPEC_CTRL unchanged on exit to xen + * + * There are two complicating factors: + * 1) HVM guests can have direct access to the MSR, so it can change + * behind Xen's back. + * 2) An NMI or MCE can interrupt at any point, including early in the entry + * path, or late in the exit path after restoring the guest value. This + * will corrupt the guest value. + * + * Factor 1 is dealt with by relying on NMIs/MCEs being blocked immediately + * after VMEXIT. The VMEXIT-specific code reads MSR_SPEC_CTRL and updates + * current before loading Xen's MSR_SPEC_CTRL setting. + * + * Factor 2 is harder. We maintain a shadow_spec_ctrl value, and + * use_shadow_spec_ctrl boolean per cpu. The synchronous use is: + * + * 1) Store guest value in shadow_spec_ctrl + * 2) Set use_shadow_spec_ctrl boolean + * 3) Load guest value into MSR_SPEC_CTRL + * 4) Exit to guest + * 5) Entry from guest + * 6) Clear use_shadow_spec_ctrl boolean + * 7) Load Xen's value into MSR_SPEC_CTRL + * + * The asynchronous use for interrupts/exceptions is: + * - Set/clear IBRS on entry to Xen + * - On exit to Xen, check use_shadow_spec_ctrl + * - If set, load shadow_spec_ctrl + * + * Therefore, an interrupt/exception which hits the synchronous path between + * steps 2 and 6 will restore the shadow value rather than leaving Xen's value + * loaded and corrupting the value used in guest context. + * + * The following ASM fragments implement this algorithm. See their local + * comments for further details. + * - SPEC_CTRL_ENTRY_FROM_VMEXIT + * - SPEC_CTRL_ENTRY_FROM_PV + * - SPEC_CTRL_ENTRY_FROM_INTR + * - SPEC_CTRL_EXIT_TO_XEN + * - SPEC_CTRL_EXIT_TO_GUEST + */ + +.macro DO_OVERWRITE_RSB +/* + * Requires nothing + * Clobbers %rax, %rcx + * + * Requires 256 bytes of stack space, but %rsp has no net change. Based on + * Google's performance numbers, the loop is unrolled to 16 iterations and two + * calls per iteration. + * + * The call filling the RSB needs a nonzero displacement. A nop would do, but + * we use "1: pause; lfence; jmp 1b" to safely contains any ret-based + * speculation, even if the loop is speculatively executed prematurely. + * + * %rsp is preserved by using an extra GPR because a) we've got plenty spare, + * b) the two movs are shorter to encode than `add $32*8, %rsp`, and c) can be + * optimised with mov-elimination in modern cores. + */ + mov $16, %ecx /* 16 iterations, two calls per loop */ + mov %rsp, %rax /* Store the current %rsp */ + +.L\@_fill_rsb_loop: + + .irp n, 1, 2 /* Unrolled twice. */ + call .L\@_insert_rsb_entry_\n /* Create an RSB entry. */ + +.L\@_capture_speculation_\n: + pause + lfence + jmp .L\@_capture_speculation_\n /* Capture rogue speculation. */ + +.L\@_insert_rsb_entry_\n: + .endr + + sub $1, %ecx + jnz .L\@_fill_rsb_loop + mov %rax, %rsp /* Restore old %rsp */ +.endm + +.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req +/* + * Requires %rbx=current, %rsp=regs/cpuinfo + * Clobbers %rax, %rcx, %rdx + * + * The common case is that a guest has direct access to MSR_SPEC_CTRL, at + * which point we need to save the guest value before setting IBRS for Xen. + * Unilaterally saving the guest value is shorter and faster than checking. + */ + mov $MSR_SPEC_CTRL, %ecx + rdmsr + + /* Stash the value from hardware. */ + mov %eax, VCPU_arch_spec_ctrl(%rbx) + xor %edx, %edx + + /* Clear SPEC_CTRL shadowing *before* loading Xen's value. */ + movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp) + + /* Load Xen's intended value. */ + mov $\ibrs_val, %eax + wrmsr +.endm + +.macro DO_SPEC_CTRL_ENTRY maybexen:req ibrs_val:req +/* + * Requires %rsp=regs (also cpuinfo if !maybexen) + * Requires %r14=stack_end (if maybexen) + * Clobbers %rax, %rcx, %rdx + * + * PV guests can't update MSR_SPEC_CTRL behind Xen's back, so no need to read + * it back. Entries from guest context need to clear SPEC_CTRL shadowing, + * while entries from Xen must leave shadowing in its current state. + */ + mov $MSR_SPEC_CTRL, %ecx + xor %edx, %edx + + /* + * Clear SPEC_CTRL shadowing *before* loading Xen's value. If entering + * from a possibly-xen context, %rsp doesn't necessarily alias the cpuinfo + * block so calculate the position directly. + */ + .if \maybexen + /* Branchless `if ( !xen ) clear_shadowing` */ + testb $3, UREGS_cs(%rsp) + setz %al + and %al, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14) + .else + movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp) + .endif + + /* Load Xen's intended value. */ + mov $\ibrs_val, %eax + wrmsr +.endm + +.macro DO_SPEC_CTRL_EXIT_TO_XEN +/* + * Requires %rbx=stack_end + * Clobbers %rax, %rcx, %rdx + * + * When returning to Xen context, look to see whether SPEC_CTRL shadowing is + * in effect, and reload the shadow value. This covers race conditions which + * exist with an NMI/MCE/etc hitting late in the return-to-guest path. + */ + xor %edx, %edx + + cmpb %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%rbx) + je .L\@_skip + + mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax + mov $MSR_SPEC_CTRL, %ecx + wrmsr + +.L\@_skip: +.endm + +.macro DO_SPEC_CTRL_EXIT_TO_GUEST +/* + * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo + * Clobbers %rcx, %rdx + * + * When returning to guest context, set up SPEC_CTRL shadowing and load the + * guest value. + */ + /* Set up shadow value *before* enabling shadowing. */ + mov %eax, CPUINFO_shadow_spec_ctrl(%rsp) + + /* Set SPEC_CTRL shadowing *before* loading the guest value. */ + movb $1, CPUINFO_use_shadow_spec_ctrl(%rsp) + + mov $MSR_SPEC_CTRL, %ecx + xor %edx, %edx + wrmsr +.endm + +/* Use after a VMEXIT from an HVM guest. */ +#define SPEC_CTRL_ENTRY_FROM_VMEXIT \ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT; \ + ALTERNATIVE_2 __stringify(ASM_NOP32), \ + __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \ + ibrs_val=SPEC_CTRL_IBRS), \ + X86_FEATURE_XEN_IBRS_SET, \ + __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \ + ibrs_val=0), \ + X86_FEATURE_XEN_IBRS_CLEAR + +/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ +#define SPEC_CTRL_ENTRY_FROM_PV \ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ + ALTERNATIVE_2 __stringify(ASM_NOP21), \ + __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 \ + ibrs_val=SPEC_CTRL_IBRS), \ + X86_FEATURE_XEN_IBRS_SET, \ + __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 ibrs_val=0), \ + X86_FEATURE_XEN_IBRS_CLEAR + +/* Use in interrupt/exception context. May interrupt Xen or PV context. */ +#define SPEC_CTRL_ENTRY_FROM_INTR \ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ + ALTERNATIVE_2 __stringify(ASM_NOP29), \ + __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 \ + ibrs_val=SPEC_CTRL_IBRS), \ + X86_FEATURE_XEN_IBRS_SET, \ + __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 ibrs_val=0), \ + X86_FEATURE_XEN_IBRS_CLEAR + +/* Use when exiting to Xen context. */ +#define SPEC_CTRL_EXIT_TO_XEN \ + ALTERNATIVE_2 __stringify(ASM_NOP17), \ + DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_SET, \ + DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_CLEAR + +/* Use when exiting to guest context. */ +#define SPEC_CTRL_EXIT_TO_GUEST \ + ALTERNATIVE_2 __stringify(ASM_NOP24), \ + DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_SET, \ + DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_CLEAR + +/* TODO: Drop these when the alternatives infrastructure is NMI/#MC safe. */ +.macro SPEC_CTRL_ENTRY_FROM_INTR_IST +/* + * Requires %rsp=regs, %r14=stack_end + * Clobbers %rax, %rcx, %rdx + * + * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY + * maybexen=1, but with conditionals rather than alternatives. + */ + movzbl STACK_CPUINFO_FIELD(bti_ist_info)(%r14), %eax + + testb $BTI_IST_RSB, %al + jz .L\@_skip_rsb + + DO_OVERWRITE_RSB + +.L\@_skip_rsb: + + testb $BTI_IST_WRMSR, %al + jz .L\@_skip_wrmsr + + xor %edx, %edx + testb $3, UREGS_cs(%rsp) + setz %dl + and %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14) + +.L\@_entry_from_xen: + /* + * Load Xen's intended value. SPEC_CTRL_IBRS vs 0 is encoded in the + * bottom bit of bti_ist_info, via a deliberate alias with BTI_IST_IBRS. + */ + mov $MSR_SPEC_CTRL, %ecx + and $BTI_IST_IBRS, %eax + wrmsr + + /* Opencoded UNLIKELY_START() with no condition. */ +UNLIKELY_DISPATCH_LABEL(\@_serialise): + .subsection 1 + /* + * In the case that we might need to set SPEC_CTRL.IBRS for safety, we + * need to ensure that an attacker can't poison the `jz .L\@_skip_wrmsr` + * to speculate around the WRMSR. As a result, we need a dispatch + * serialising instruction in the else clause. + */ +.L\@_skip_wrmsr: + lfence + UNLIKELY_END(\@_serialise) +.endm + +.macro SPEC_CTRL_EXIT_TO_XEN_IST +/* + * Requires %rbx=stack_end + * Clobbers %rax, %rcx, %rdx + */ + testb $BTI_IST_WRMSR, STACK_CPUINFO_FIELD(bti_ist_info)(%rbx) + jz .L\@_skip + + DO_SPEC_CTRL_EXIT_TO_XEN + +.L\@_skip: +.endm + +#endif /* __ASSEMBLY__ */ +#endif /* !__X86_SPEC_CTRL_ASM_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/xen/include/asm-x86/spec_ctrl.h xen-4.9.2/xen/include/asm-x86/spec_ctrl.h --- xen-4.9.0/xen/include/asm-x86/spec_ctrl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/spec_ctrl.h 2018-03-28 13:10:55.000000000 +0000 @@ -0,0 +1,82 @@ +/****************************************************************************** + * include/asm-x86/spec_ctrl.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; If not, see . + * + * Copyright (c) 2017-2018 Citrix Systems Ltd. + */ + +#ifndef __X86_SPEC_CTRL_H__ +#define __X86_SPEC_CTRL_H__ + +#include +#include +#include + +void init_speculation_mitigations(void); + +extern bool opt_ibpb; +extern uint8_t default_bti_ist_info; + +static inline void init_shadow_spec_ctrl_state(void) +{ + struct cpu_info *info = get_cpu_info(); + + info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0; + info->bti_ist_info = default_bti_ist_info; +} + +/* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */ +static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) +{ + uint32_t val = 0; + + /* + * Latch the new shadow value, then enable shadowing, then update the MSR. + * There are no SMP issues here; only local processor ordering concerns. + */ + info->shadow_spec_ctrl = val; + barrier(); + info->use_shadow_spec_ctrl = true; + barrier(); + asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET) + :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); +} + +/* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */ +static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) +{ + uint32_t val = SPEC_CTRL_IBRS; + + /* + * Disable shadowing before updating the MSR. There are no SMP issues + * here; only local processor ordering concerns. + */ + info->use_shadow_spec_ctrl = false; + barrier(); + asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET) + :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); +} + +#endif /* !__X86_SPEC_CTRL_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.9.0/xen/include/asm-x86/x86_64/page.h xen-4.9.2/xen/include/asm-x86/x86_64/page.h --- xen-4.9.0/xen/include/asm-x86/x86_64/page.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/asm-x86/x86_64/page.h 2018-03-28 13:10:55.000000000 +0000 @@ -25,8 +25,11 @@ /* These are architectural limits. Current CPUs support only 40-bit phys. */ #define PADDR_BITS 52 #define VADDR_BITS 48 -#define PADDR_MASK ((1UL << PADDR_BITS)-1) -#define VADDR_MASK ((1UL << VADDR_BITS)-1) +#define PADDR_MASK ((_AC(1,UL) << PADDR_BITS) - 1) +#define VADDR_MASK ((_AC(1,UL) << VADDR_BITS) - 1) + +#define VADDR_TOP_BIT (1UL << (VADDR_BITS - 1)) +#define CANONICAL_MASK (~0UL & ~VADDR_MASK) #define is_canonical_address(x) (((long)(x) >> 47) == ((long)(x) >> 63)) @@ -116,6 +119,7 @@ : (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) || \ ((_s) > ROOT_PAGETABLE_LAST_XEN_SLOT))) +#define root_table_offset l4_table_offset #define root_get_pfn l4e_get_pfn #define root_get_flags l4e_get_flags #define root_get_intpte l4e_get_intpte diff -Nru xen-4.9.0/xen/include/public/arch-x86/cpufeatureset.h xen-4.9.2/xen/include/public/arch-x86/cpufeatureset.h --- xen-4.9.0/xen/include/public/arch-x86/cpufeatureset.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/public/arch-x86/cpufeatureset.h 2018-03-28 13:10:55.000000000 +0000 @@ -236,10 +236,13 @@ /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */ XEN_CPUFEATURE(CLZERO, 8*32+ 0) /*A CLZERO instruction */ +XEN_CPUFEATURE(IBPB, 8*32+12) /*A IBPB support only (no IBRS, used by AMD) */ /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ -XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ -XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ +XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ +XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ +XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ +XEN_CPUFEATURE(STIBP, 9*32+27) /*A! STIBP */ #endif /* XEN_CPUFEATURE */ diff -Nru xen-4.9.0/xen/include/public/arch-x86/xen-x86_64.h xen-4.9.2/xen/include/public/arch-x86/xen-x86_64.h --- xen-4.9.0/xen/include/public/arch-x86/xen-x86_64.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/public/arch-x86/xen-x86_64.h 2018-03-28 13:10:55.000000000 +0000 @@ -203,8 +203,8 @@ uint16_t ss, _pad2[3]; uint16_t es, _pad3[3]; uint16_t ds, _pad4[3]; - uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ - uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ + uint16_t fs, _pad5[3]; /* Non-nul => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-nul => takes precedence over gs_base_user. */ }; typedef struct cpu_user_regs cpu_user_regs_t; DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); diff -Nru xen-4.9.0/xen/include/xen/grant_table.h xen-4.9.2/xen/include/xen/grant_table.h --- xen-4.9.0/xen/include/xen/grant_table.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/xen/grant_table.h 2018-03-28 13:10:55.000000000 +0000 @@ -78,7 +78,7 @@ /* Mapping tracking table per vcpu. */ struct grant_mapping **maptrack; unsigned int maptrack_limit; - /* Lock protecting the maptrack page list, head, and limit */ + /* Lock protecting the maptrack limit */ spinlock_t maptrack_lock; /* The defined versions are 1 and 2. Set to 0 if we don't know what version to use yet. */ diff -Nru xen-4.9.0/xen/include/xen/lib.h xen-4.9.2/xen/include/xen/lib.h --- xen-4.9.0/xen/include/xen/lib.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/xen/lib.h 2018-03-28 13:10:55.000000000 +0000 @@ -73,6 +73,13 @@ void cmdline_parse(const char *cmdline); int parse_bool(const char *s); +/** + * Given a specific name, parses a string of the form: + * [no-]$NAME[=...] + * returning 0 or 1 for a recognised boolean, or -1 for an error. + */ +int parse_boolean(const char *name, const char *s, const char *e); + /*#define DEBUG_TRACE_DUMP*/ #ifdef DEBUG_TRACE_DUMP extern void debugtrace_dump(void); diff -Nru xen-4.9.0/xen/include/xen/pci.h xen-4.9.2/xen/include/xen/pci.h --- xen-4.9.0/xen/include/xen/pci.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/xen/pci.h 2018-03-28 13:10:55.000000000 +0000 @@ -39,6 +39,10 @@ #define PCI_SBDF3(s,b,df) ((((s) & 0xffff) << 16) | PCI_BDF2(b, df)) struct pci_dev_info { + /* + * VF's 'is_extfn' field is used to indicate whether its PF is an extended + * function. + */ bool_t is_extfn; bool_t is_virtfn; struct { diff -Nru xen-4.9.0/xen/include/xen/sched.h xen-4.9.2/xen/include/xen/sched.h --- xen-4.9.0/xen/include/xen/sched.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/xen/sched.h 2018-03-28 13:10:55.000000000 +0000 @@ -230,6 +230,7 @@ int controller_pause_count; /* Grant table map tracking. */ + spinlock_t maptrack_freelist_lock; unsigned int maptrack_head; unsigned int maptrack_tail; diff -Nru xen-4.9.0/xen/include/xsm/dummy.h xen-4.9.2/xen/include/xsm/dummy.h --- xen-4.9.0/xen/include/xsm/dummy.h 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/include/xsm/dummy.h 2018-03-28 13:10:55.000000000 +0000 @@ -453,7 +453,7 @@ static XSM_INLINE int xsm_map_domain_pirq(XSM_DEFAULT_ARG struct domain *d) { - XSM_ASSERT_ACTION(XSM_TARGET); + XSM_ASSERT_ACTION(XSM_DM_PRIV); return xsm_default_action(action, current->domain, d); } @@ -465,7 +465,7 @@ static XSM_INLINE int xsm_unmap_domain_pirq(XSM_DEFAULT_ARG struct domain *d) { - XSM_ASSERT_ACTION(XSM_TARGET); + XSM_ASSERT_ACTION(XSM_DM_PRIV); return xsm_default_action(action, current->domain, d); } diff -Nru xen-4.9.0/xen/Makefile xen-4.9.2/xen/Makefile --- xen-4.9.0/xen/Makefile 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/Makefile 2018-03-28 13:10:55.000000000 +0000 @@ -2,7 +2,7 @@ # All other places this is stored (eg. compile.h) should be autogenerated. export XEN_VERSION = 4 export XEN_SUBVERSION = 9 -export XEN_EXTRAVERSION ?= .0$(XEN_VENDORVERSION) +export XEN_EXTRAVERSION ?= .2$(XEN_VENDORVERSION) export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) -include xen-version diff -Nru xen-4.9.0/xen/Rules.mk xen-4.9.2/xen/Rules.mk --- xen-4.9.0/xen/Rules.mk 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/Rules.mk 2018-03-28 13:10:55.000000000 +0000 @@ -66,8 +66,8 @@ AFLAGS-y += -D__ASSEMBLY__ -# Clang's built-in assembler can't handle .code16/.code32/.code64 yet -AFLAGS-$(clang) += -no-integrated-as +# Clang's built-in assembler can't handle embedded .include's +CFLAGS-$(clang) += -no-integrated-as ALL_OBJS := $(ALL_OBJS-y) diff -Nru xen-4.9.0/xen/tools/gen-cpuid.py xen-4.9.2/xen/tools/gen-cpuid.py --- xen-4.9.0/xen/tools/gen-cpuid.py 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/tools/gen-cpuid.py 2018-03-28 13:10:55.000000000 +0000 @@ -256,6 +256,11 @@ AVX512F: [AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD, AVX512BW, AVX512VL, AVX512VBMI, AVX512_4VNNIW, AVX512_4FMAPS, AVX512_VPOPCNTDQ], + + # Single Thread Indirect Branch Predictors enumerates a new bit in the + # MSR enumerated by Indirect Branch Restricted Speculation/Indirect + # Branch Prediction Barrier enumeration. + IBRSB: [STIBP], } deep_features = tuple(sorted(deps.keys())) diff -Nru xen-4.9.0/xen/xsm/flask/hooks.c xen-4.9.2/xen/xsm/flask/hooks.c --- xen-4.9.0/xen/xsm/flask/hooks.c 2017-06-27 18:13:19.000000000 +0000 +++ xen-4.9.2/xen/xsm/flask/hooks.c 2018-03-28 13:10:55.000000000 +0000 @@ -918,8 +918,8 @@ u32 *sid, struct avc_audit_data *ad) { #ifdef CONFIG_HAS_PCI - struct msi_info *msi = data; - u32 machine_bdf = (msi->seg << 16) | (msi->bus << 8) | msi->devfn; + const struct pci_dev *pdev = data; + u32 machine_bdf = (pdev->seg << 16) | (pdev->bus << 8) | pdev->devfn; AVC_AUDIT_DATA_INIT(ad, DEV); ad->device = machine_bdf;