diff -Nru xen-4.6.0/config/Tools.mk.in xen-4.6.5/config/Tools.mk.in --- xen-4.6.0/config/Tools.mk.in 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/config/Tools.mk.in 2017-03-07 16:19:05.000000000 +0000 @@ -5,9 +5,6 @@ XEN_OS := NetBSDRump endif -# A debug build of tools? -debug := @debug@ - # Tools path BISON := @BISON@ FLEX := @FLEX@ diff -Nru xen-4.6.0/Config.mk xen-4.6.5/Config.mk --- xen-4.6.0/Config.mk 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/Config.mk 2017-03-07 16:19:05.000000000 +0000 @@ -242,22 +242,24 @@ ifeq ($(GIT_HTTP),y) OVMF_UPSTREAM_URL ?= http://xenbits.xen.org/git-http/ovmf.git -QEMU_UPSTREAM_URL ?= http://xenbits.xen.org/git-http/qemu-upstream-4.6-testing.git -QEMU_TRADITIONAL_URL ?= http://xenbits.xen.org/git-http/qemu-xen-4.6-testing.git +QEMU_UPSTREAM_URL ?= http://xenbits.xen.org/git-http/qemu-xen.git +QEMU_TRADITIONAL_URL ?= http://xenbits.xen.org/git-http/qemu-xen-traditional.git SEABIOS_UPSTREAM_URL ?= http://xenbits.xen.org/git-http/seabios.git MINIOS_UPSTREAM_URL ?= http://xenbits.xen.org/git-http/mini-os.git else OVMF_UPSTREAM_URL ?= git://xenbits.xen.org/ovmf.git -QEMU_UPSTREAM_URL ?= git://xenbits.xen.org/qemu-upstream-4.6-testing.git -QEMU_TRADITIONAL_URL ?= git://xenbits.xen.org/qemu-xen-4.6-testing.git +QEMU_UPSTREAM_URL ?= git://xenbits.xen.org/qemu-xen.git +QEMU_TRADITIONAL_URL ?= git://xenbits.xen.org/qemu-xen-traditional.git SEABIOS_UPSTREAM_URL ?= git://xenbits.xen.org/seabios.git MINIOS_UPSTREAM_URL ?= git://xenbits.xen.org/mini-os.git endif -OVMF_UPSTREAM_REVISION ?= cb9a7ebabcd6b8a49dc0854b2f9592d732b5afbd -QEMU_UPSTREAM_REVISION ?= qemu-xen-4.6.0 -MINIOS_UPSTREAM_REVISION ?= xen-RELEASE-4.6.0 -# Fri Jun 26 11:58:40 2015 +0100 -# Correct printf formatting for tpm_tis message. +OVMF_UPSTREAM_REVISION ?= 52a99493cce88a9d4ec8a02d7f1bd1a1001ce60d +QEMU_UPSTREAM_REVISION ?= qemu-xen-4.6.5 +# Wed Sep 16 17:38:44 2015 +0200 +# trace: remove malloc tracing +MINIOS_UPSTREAM_REVISION ?= xen-RELEASE-4.6.5 +# Fri May 13 15:21:10 2016 +0100 +# lib/sys.c: enclose file_types in define guards SEABIOS_UPSTREAM_REVISION ?= rel-1.8.2 # Tue Mar 17 10:52:16 2015 -0400 @@ -266,9 +268,9 @@ ETHERBOOT_NICS ?= rtl8139 8086100e -QEMU_TRADITIONAL_REVISION ?= xen-4.6.0 -# Tue Sep 8 15:41:20 2015 +0100 -# Fix build after "ui/vnc: limit client_cut_text msg payload size" +QEMU_TRADITIONAL_REVISION ?= xen-4.6.5 +# Wed Feb 22 16:25:19 2017 +0000 +# cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo # Specify which qemu-dm to use. This may be `ioemu' to use the old # Mercurial in-tree version, or a local directory, or a git URL. diff -Nru xen-4.6.0/debian/changelog xen-4.6.5/debian/changelog --- xen-4.6.0/debian/changelog 2017-01-10 14:07:08.000000000 +0000 +++ xen-4.6.5/debian/changelog 2017-05-09 13:30:26.000000000 +0000 @@ -1,3 +1,145 @@ +xen (4.6.5-0ubuntu1.1) xenial-security; urgency=low + + * Applying Xen Security Advisories: + - XSA-206 + * xenstored: apply a write transaction rate limit + * xenstored: Log when the write transaction rate limit bites + * oxenstored: refactor putting response on wire + * oxenstored: remove some unused parameters + * oxenstored: refactor request processing + * oxenstored: keep track of each transaction's operations + * oxenstored: move functions that process simple operations + * oxenstored: replay transaction upon conflict + * oxenstored: log request and response during transaction replay + * oxenstored: allow compilation prior to OCaml 3.12.0 + * oxenstored: comments explaining some variables + * oxenstored: handling of domain conflict-credit + * oxenstored: ignore domains with no conflict-credit + * oxenstored: add transaction info relevant to history-tracking + * oxenstored: support commit history tracking + * oxenstored: only record operations with side-effects in history + * oxenstored: discard old commit-history on txn end + * oxenstored: track commit history + * oxenstored: blame the connection that caused a transaction conflict + * oxenstored: allow self-conflicts + * oxenstored: do not commit read-only transactions + * oxenstored: don't wake to issue no conflict-credit + * oxenstored transaction conflicts: improve logging + * oxenstored: trim history in the frequent_ops function + - CVE-2017-7228 / XSA-212 + * memory: properly check guest memory ranges in XENMEM_exchange handling + - XSA-213 + * multicall: deal with early exit conditions + - XSA-214 + * x86: discard type information when stealing pages + - XSA-215 + * x86: correct create_bounce_frame + + -- Stefan Bader Tue, 09 May 2017 15:09:37 +0200 + +xen (4.6.5-0ubuntu1) xenial; urgency=medium + + * Rebasing to upstream stable release 4.6.5 (LP: #1671864) + https://www.xenproject.org/downloads/xen-archives/xen-46-series.html + - Includes fix for booting 4.10 Linux kernels in HVM guests on Intel + hosts which support the TSC_ADJUST MSR (LP: #1671760) + - Additional security relevant changes: + * CVE-2013-2076 / XSA-052 (update) + - Information leak on XSAVE/XRSTOR capable AMD CPUs + * CVE-2016-7093 / XSA-186 (4.6.3 became vulnerable) + - x86: Mishandling of instruction pointer truncation during emulation + * XSA-207 + - memory leak when destroying guest without PT devices + - Replacing the following security fixes with the versions from the + stable update: + * CVE-2015-7812 / XSA-145 + - arm: Host crash when preempting a multicall + * CVE-2015-7813 / XSA-146 + - arm: various unimplemented hypercalls log without rate limiting + * CVE-2015-7814 / XSA-147 + - arm: Race between domain destruction and memory allocation decrease + * CVE-2015-7835 / XSA-148 + - x86: Uncontrolled creation of large page mappings by PV guests + * CVE-2015-7969 / XSA-149, XSA-151 + - leak of main per-domain vcpu pointer array + - x86: leak of per-domain profiling-related vcpu pointer array + * CVE-2015-7970 / XSA-150 + - x86: Long latency populate-on-demand operation is not preemptible + * CVE-2015-7971 / XSA-152 + - x86: some pmu and profiling hypercalls log without rate limiting + * CVE-2015-7972 / XSA-153 + - x86: populate-on-demand balloon size inaccuracy can crash guests + * CVE-2016-2270 / XSA-154 + - x86: inconsistent cachability flags on guest mappings + * CVE-2015-8550 / XSA-155 + - paravirtualized drivers incautious about shared memory contents + * CVE-2015-5307, CVE-2015-8104 / XSA-156 + - x86: CPU lockup during exception delivery + * CVE-2015-8338 / XSA-158 + - long running memory operations on ARM + * CVE-2015-8339, CVE-2015-8340 / XSA-159 + XENMEM_exchange error handling issues + * CVE-2015-8341 / XSA-160 + - libxl leak of pv kernel and initrd on error + * CVE-2015-8555 / XSA-165 + - information leak in legacy x86 FPU/XMM initialization + * XSA-166 + - ioreq handling possibly susceptible to multiple read issue + * CVE-2016-1570 / XSA-167 + - PV superpage functionality missing sanity checks + * CVE-2016-1571 / XSA-168 + - VMX: intercept issue with INVLPG on non-canonical address + * CVE-2015-8615 / XSA-169 + - x86: unintentional logging upon guest changing callback method + * CVE-2016-2271 / XSA-170 + - VMX: guest user mode may crash guest with non-canonical RIP + * CVE-2016-3158, CVE-2016-3159 / XSA-172 + - broken AMD FPU FIP/FDP/FOP leak workaround + * CVE-2016-3960 / XSA-173 + - x86 shadow pagetables: address width overflow + * CVE-2016-4962 / XSA-175 + - Unsanitised guest input in libxl device handling code + * CVE-2016-4480 / XSA-176 + - x86 software guest page walk PS bit handling flaw + * CVE-2016-4963 / XSA-178 + - Unsanitised driver domain input in libxl device handling + * CVE-2016-5242 / XSA-181 + - arm: Host crash caused by VMID exhaustion + * CVE-2016-6258 / XSA-182 + - x86: Privilege escalation in PV guests + * CVE-2016-6259 / XSA-183 + - x86: Missing SMAP whitelisting in 32-bit exception / event delivery + * CVE-2016-7092 / XSA-185 + - x86: Disallow L3 recursive pagetable for 32-bit PV guests + * CVE-2016-7094 / XSA-187 + - x86 HVM: Overflow of sh_ctxt->seg_reg[] + * CVE-2016-7777 / XSA-190 + - CR0.TS and CR0.EM not always honored for x86 HVM guests + * CVE-2016-9386 / XSA-191 + - x86 null segments not always treated as unusable + * CVE-2016-9382 / XSA-192 + - x86 task switch to VM86 mode mis-handled + * CVE-2016-9385 / XSA-193 + - x86 segment base write emulation lacking canonical address checks + * CVE-2016-9383 / XSA-195 + - x86 64-bit bit test instruction emulation broken + * CVE-2016-9377, CVE-2016-9378 / XSA-196 + - x86 software interrupt injection mis-handled + * CVE-2016-9379, CVE-2016-9380 / XSA-198 + - delimiter injection vulnerabilities in pygrub + * CVE-2016-9932 / XSA-200 + - x86 CMPXCHG8B emulation fails to ignore operand size override + * CVE-2016-9815, CVE-2016-9816, CVE-2016-9817, CVE-2016-9818 / XSA-201 + - ARM guests may induce host asynchronous abort + * CVE-2016-10024 / XSA-202 + - x86 PV guests may be able to mask interrupts + * CVE-2016-10025 / XSA-203 + - x86: missing NULL pointer check in VMFUNC emulation + * CVE-2016-10013 / XSA-204 + - x86: Mishandling of SYSCALL singlestep during emulation + + -- Stefan Bader Tue, 14 Mar 2017 16:08:39 +0100 + xen (4.6.0-1ubuntu4.3) xenial-security; urgency=low * Applying Xen Security Advisories: diff -Nru xen-4.6.0/debian/patches/CVE-2015-7812.diff xen-4.6.5/debian/patches/CVE-2015-7812.diff --- xen-4.6.0/debian/patches/CVE-2015-7812.diff 2015-11-01 20:38:23.000000000 +0000 +++ xen-4.6.5/debian/patches/CVE-2015-7812.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,59 +0,0 @@ -From 1440439d8552e98995b91234480505c5eb154eb5 Mon Sep 17 00:00:00 2001 -From: Julien Grall -Date: Thu, 29 Oct 2015 13:46:45 +0100 -Subject: arm: Support hypercall_create_continuation for multicall - -Multicall for ARM has been supported since commit f0dbdc6 "xen: arm: fully -implement multicall interface.". Although, if an hypercall in multicall -requires preemption, it will crash the host: - -(XEN) Xen BUG at domain.c:347 -(XEN) ----[ Xen-4.7-unstable arm64 debug=y Tainted: C ]---- -[...] -(XEN) Xen call trace: -(XEN) [<00000000002420cc>] hypercall_create_continuation+0x64/0x380 (PC) -(XEN) [<0000000000217274>] do_memory_op+0x1b00/0x2334 (LR) -(XEN) [<0000000000250d2c>] do_multicall_call+0x114/0x124 -(XEN) [<0000000000217ff0>] do_multicall+0x17c/0x23c -(XEN) [<000000000024f97c>] do_trap_hypercall+0x90/0x12c -(XEN) [<0000000000251ca8>] do_trap_hypervisor+0xd2c/0x1ba4 -(XEN) [<00000000002582cc>] guest_sync+0x88/0xb8 -(XEN) -(XEN) -(XEN) **************************************** -(XEN) Panic on CPU 5: -(XEN) Xen BUG at domain.c:347 -(XEN) **************************************** -(XEN) -(XEN) Manual reset required ('noreboot' specified) - -Looking to the code, the support of multicall looks valid to me, as we only -need to fill call.args[...]. So drop the BUG(); - -This is CVE-2015-7812 / XSA-145. - -Signed-off-by: Julien Grall -Acked-by: Ian Campbell -master commit: 29bcf64ce8bc0b1b7aacd00c8668f255c4f0686c -master date: 2015-10-29 13:31:10 +0100 - -(cherry picked from commit ea95ecb8bf30f83b52a079cdfc824a3ba6ffd4ef) - -Patch-Name: CVE-2015-7812.diff ---- - xen/arch/arm/domain.c | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c -index b2bfc7d..b9a4226 100644 ---- a/xen/arch/arm/domain.c -+++ b/xen/arch/arm/domain.c -@@ -344,8 +344,6 @@ unsigned long hypercall_create_continuation( - - if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) - { -- BUG(); /* XXX multicalls not implemented yet. */ -- - __set_bit(_MCSF_call_preempted, &mcs->flags); - - for ( i = 0; *p != '\0'; i++ ) diff -Nru xen-4.6.0/debian/patches/CVE-2015-7813.diff xen-4.6.5/debian/patches/CVE-2015-7813.diff --- xen-4.6.0/debian/patches/CVE-2015-7813.diff 2015-11-01 20:38:23.000000000 +0000 +++ xen-4.6.5/debian/patches/CVE-2015-7813.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -From 2989d96e3d84f2e589133edf317b8aed2429f3c8 Mon Sep 17 00:00:00 2001 -From: Ian Campbell -Date: Thu, 29 Oct 2015 13:47:10 +0100 -Subject: arm: rate-limit logging from unimplemented PHYSDEVOP and HVMOP. - -These are guest accessible and should therefore be rate-limited. -Moreover, include them only in debug builds. - -This is CVE-2015-7813 / XSA-146. - -Signed-off-by: Ian Campbell -Reviewed-by: Jan Beulich -master commit: 1c0e59ff15764e7b0c59282365974f5b8924ce83 -master date: 2015-10-29 13:33:38 +0100 - -(cherry picked from commit b18d995ca341d07a38fec04aa137e9ef85ee4dd0) - -Patch-Name: CVE-2015-7813.diff ---- - xen/arch/arm/hvm.c | 2 +- - xen/arch/arm/physdev.c | 3 ++- - 2 files changed, 3 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/arm/hvm.c b/xen/arch/arm/hvm.c -index 471c4cd..5fd0753 100644 ---- a/xen/arch/arm/hvm.c -+++ b/xen/arch/arm/hvm.c -@@ -57,7 +57,7 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) - - default: - { -- printk("%s: Bad HVM op %ld.\n", __func__, op); -+ gdprintk(XENLOG_DEBUG, "HVMOP op=%lu: not implemented\n", op); - rc = -ENOSYS; - break; - } -diff --git a/xen/arch/arm/physdev.c b/xen/arch/arm/physdev.c -index 61b4a18..27bbbda 100644 ---- a/xen/arch/arm/physdev.c -+++ b/xen/arch/arm/physdev.c -@@ -8,12 +8,13 @@ - #include - #include - #include -+#include - #include - - - int do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) - { -- printk("%s %d cmd=%d: not implemented yet\n", __func__, __LINE__, cmd); -+ gdprintk(XENLOG_DEBUG, "PHYSDEVOP cmd=%d: not implemented\n", cmd); - return -ENOSYS; - } - diff -Nru xen-4.6.0/debian/patches/CVE-2015-7814.diff xen-4.6.5/debian/patches/CVE-2015-7814.diff --- xen-4.6.0/debian/patches/CVE-2015-7814.diff 2015-11-01 20:38:23.000000000 +0000 +++ xen-4.6.5/debian/patches/CVE-2015-7814.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,50 +0,0 @@ -From 0361df69e22ae145cfdd95b0a3ea75a858a4bc08 Mon Sep 17 00:00:00 2001 -From: Ian Campbell -Date: Thu, 29 Oct 2015 13:47:38 +0100 -Subject: arm: handle races between relinquish_memory and free_domheap_pages - -Primarily this means XENMEM_decrease_reservation from a toolstack -domain. - -Unlike x86 we have no requirement right now to queue such pages onto -a separate list, if we hit this race then the other code has already -fully accepted responsibility for freeing this page and therefore -there is no more for relinquish_memory to do. - -This is CVE-2015-7814 / XSA-147. - -Signed-off-by: Ian Campbell -Reviewed-by: Julien Grall -Reviewed-by: Jan Beulich -master commit: 1ef01396fdff88b1c3331a09ca5c69619b90f4ea -master date: 2015-10-29 13:34:17 +0100 - -(cherry picked from commit df6fa370865717ee51530c0102d1e983a70d37c3) - -Patch-Name: CVE-2015-7814.diff ---- - xen/arch/arm/domain.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c -index b9a4226..20cc772 100644 ---- a/xen/arch/arm/domain.c -+++ b/xen/arch/arm/domain.c -@@ -768,8 +768,15 @@ static int relinquish_memory(struct domain *d, struct page_list_head *list) - { - /* Grab a reference to the page so it won't disappear from under us. */ - if ( unlikely(!get_page(page, d)) ) -- /* Couldn't get a reference -- someone is freeing this page. */ -- BUG(); -+ /* -+ * Couldn't get a reference -- someone is freeing this page and -+ * has already committed to doing so, so no more to do here. -+ * -+ * Note that the page must be left on the list, a list_del -+ * here will clash with the list_del done by the other -+ * party in the race and corrupt the list head. -+ */ -+ continue; - - if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) - put_page(page); diff -Nru xen-4.6.0/debian/patches/CVE-2015-7835.diff xen-4.6.5/debian/patches/CVE-2015-7835.diff --- xen-4.6.0/debian/patches/CVE-2015-7835.diff 2015-11-01 20:38:23.000000000 +0000 +++ xen-4.6.5/debian/patches/CVE-2015-7835.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,57 +0,0 @@ -From c270ef05942a8ce35d78064aaa82f4f8360aff27 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Thu, 29 Oct 2015 13:48:09 +0100 -Subject: x86: guard against undue super page PTE creation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When optional super page support got added (commit bd1cd81d64 "x86: PV -support for hugepages"), two adjustments were missed: mod_l2_entry() -needs to consider the PSE and RW bits when deciding whether to use the -fast path, and the PSE bit must not be removed from L2_DISALLOW_MASK -unconditionally. - -This is CVE-2015-7835 / XSA-148. - -Reported-by: "栾尚聪(好风)" -Signed-off-by: Jan Beulich -Reviewed-by: Tim Deegan -master commit: fe360c90ea13f309ef78810f1a2b92f2ae3b30b8 -master date: 2015-10-29 13:35:07 +0100 - -(cherry picked from commit 2d094bd87072e26ac29b07917d31fcbf13892288) - -Patch-Name: CVE-2015-7835.diff ---- - xen/arch/x86/mm.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 202ff76..fc65982 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -160,7 +160,10 @@ static void put_superpage(unsigned long mfn); - static uint32_t base_disallow_mask; - /* Global bit is allowed to be set on L1 PTEs. Intended for user mappings. */ - #define L1_DISALLOW_MASK ((base_disallow_mask | _PAGE_GNTTAB) & ~_PAGE_GLOBAL) --#define L2_DISALLOW_MASK (base_disallow_mask & ~_PAGE_PSE) -+ -+#define L2_DISALLOW_MASK (unlikely(opt_allow_superpage) \ -+ ? base_disallow_mask & ~_PAGE_PSE \ -+ : base_disallow_mask) - - #define l3_disallow_mask(d) (!is_pv_32bit_domain(d) ? \ - base_disallow_mask : 0xFFFFF198U) -@@ -1839,7 +1842,10 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, - } - - /* Fast path for identical mapping and presence. */ -- if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) ) -+ if ( !l2e_has_changed(ol2e, nl2e, -+ unlikely(opt_allow_superpage) -+ ? _PAGE_PSE | _PAGE_RW | _PAGE_PRESENT -+ : _PAGE_PRESENT) ) - { - adjust_guest_l2e(nl2e, d); - if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) ) diff -Nru xen-4.6.0/debian/patches/CVE-2015-7969.1.diff xen-4.6.5/debian/patches/CVE-2015-7969.1.diff --- xen-4.6.0/debian/patches/CVE-2015-7969.1.diff 2015-11-01 20:38:23.000000000 +0000 +++ xen-4.6.5/debian/patches/CVE-2015-7969.1.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -From 967130a6c0a919da36281b69f600a31c22715b8c Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Thu, 29 Oct 2015 13:51:24 +0100 -Subject: xenoprof: free domain's vcpu array - -This was overlooked in fb442e2171 ("x86_64: allow more vCPU-s per -guest"). - -This is CVE-2015-7969 / XSA-151. - -Signed-off-by: Jan Beulich -Reviewed-by: Ian Campbell -master commit: 6e97c4b37386c2d09e09e9b5d5d232e37728b960 -master date: 2015-10-29 13:36:52 +0100 - -(cherry picked from commit 429f0cd270851462783fc6d56d6bae9cbb40bdca) - -Patch-Name: CVE-2015-7969.1.diff ---- - xen/common/xenoprof.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/xen/common/xenoprof.c b/xen/common/xenoprof.c -index 1061323..53a803a 100644 ---- a/xen/common/xenoprof.c -+++ b/xen/common/xenoprof.c -@@ -239,6 +239,7 @@ static int alloc_xenoprof_struct( - d->xenoprof->rawbuf = alloc_xenheap_pages(get_order_from_pages(npages), 0); - if ( d->xenoprof->rawbuf == NULL ) - { -+ xfree(d->xenoprof->vcpu); - xfree(d->xenoprof); - d->xenoprof = NULL; - return -ENOMEM; -@@ -286,6 +287,7 @@ void free_xenoprof_pages(struct domain *d) - free_xenheap_pages(x->rawbuf, order); - } - -+ xfree(x->vcpu); - xfree(x); - d->xenoprof = NULL; - } diff -Nru xen-4.6.0/debian/patches/CVE-2015-7969.diff xen-4.6.5/debian/patches/CVE-2015-7969.diff --- xen-4.6.0/debian/patches/CVE-2015-7969.diff 2015-11-01 20:38:23.000000000 +0000 +++ xen-4.6.5/debian/patches/CVE-2015-7969.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ -From 91af405ccfba7200fec38d5c2624798794a0eb76 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Thu, 29 Oct 2015 13:49:56 +0100 -Subject: free domain's vcpu array - -This was overlooked in fb442e2171 ("x86_64: allow more vCPU-s per -guest"). - -This is CVE-2015-7969 / XSA-149. - -Reported-by: Ian Campbell -Signed-off-by: Jan Beulich -Reviewed-by: Ian Campbell -master commit: d46896ebbb23f3a9fef2eb6066ae614fd1acfd96 -master date: 2015-10-29 13:35:40 +0100 - -(cherry picked from commit 2c57108c36eaa10885b7d0daad534348717e4f9d) - -Patch-Name: CVE-2015-7969.diff ---- - xen/common/domain.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/xen/common/domain.c b/xen/common/domain.c -index 1b9fcfc..796c492 100644 ---- a/xen/common/domain.c -+++ b/xen/common/domain.c -@@ -833,6 +833,7 @@ static void complete_domain_destroy(struct rcu_head *head) - - xsm_free_security_domain(d); - free_cpumask_var(d->domain_dirty_cpumask); -+ xfree(d->vcpu); - free_domain_struct(d); - - send_global_virq(VIRQ_DOM_EXC); diff -Nru xen-4.6.0/debian/patches/CVE-2015-7970.diff xen-4.6.5/debian/patches/CVE-2015-7970.diff --- xen-4.6.0/debian/patches/CVE-2015-7970.diff 2015-11-01 20:38:23.000000000 +0000 +++ xen-4.6.5/debian/patches/CVE-2015-7970.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,221 +0,0 @@ -From 86a812432587c0a6c415f935de3fb9838c0d1098 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 29 Oct 2015 13:50:59 +0100 -Subject: x86/PoD: Eager sweep for zeroed pages - -Based on the contents of a guests physical address space, -p2m_pod_emergency_sweep() could degrade into a linear memcmp() from 0 to -max_gfn, which runs non-preemptibly. - -As p2m_pod_emergency_sweep() runs behind the scenes in a number of contexts, -making it preemptible is not feasible. - -Instead, a different approach is taken. Recently-populated pages are eagerly -checked for reclaimation, which amortises the p2m_pod_emergency_sweep() -operation across each p2m_pod_demand_populate() operation. - -Note that in the case that a 2M superpage can't be reclaimed as a superpage, -it is shattered if 4K pages of zeros can be reclaimed. This is unfortunate -but matches the previous behaviour, and is required to avoid regressions -(domain crash from PoD exhaustion) with VMs configured close to the limit. - -This is CVE-2015-7970 / XSA-150. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -Reviewed-by: George Dunlap -master commit: 101ce53266866144e724ed593173bc4098b300b9 -master date: 2015-10-29 13:36:25 +0100 - -(cherry picked from commit 4a32fbd95af6503ea1314ff2aa9a0b0a473d46c0) - -Patch-Name: CVE-2015-7970.diff ---- - xen/arch/x86/mm/p2m-pod.c | 86 +++++++++++++++++++++++++++++++---------------- - xen/arch/x86/mm/p2m.c | 4 +++ - xen/include/asm-x86/p2m.h | 18 +++++++--- - 3 files changed, 75 insertions(+), 33 deletions(-) - -diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c -index 8156525..9196a5d 100644 ---- a/xen/arch/x86/mm/p2m-pod.c -+++ b/xen/arch/x86/mm/p2m-pod.c -@@ -901,28 +901,6 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count) - } - - #define POD_SWEEP_LIMIT 1024 -- --/* When populating a new superpage, look at recently populated superpages -- * hoping that they've been zeroed. This will snap up zeroed pages as soon as -- * the guest OS is done with them. */ --static void --p2m_pod_check_last_super(struct p2m_domain *p2m, unsigned long gfn_aligned) --{ -- unsigned long check_gfn; -- -- ASSERT(p2m->pod.last_populated_index < POD_HISTORY_MAX); -- -- check_gfn = p2m->pod.last_populated[p2m->pod.last_populated_index]; -- -- p2m->pod.last_populated[p2m->pod.last_populated_index] = gfn_aligned; -- -- p2m->pod.last_populated_index = -- ( p2m->pod.last_populated_index + 1 ) % POD_HISTORY_MAX; -- -- p2m_pod_zero_check_superpage(p2m, check_gfn); --} -- -- - #define POD_SWEEP_STRIDE 16 - static void - p2m_pod_emergency_sweep(struct p2m_domain *p2m) -@@ -963,7 +941,7 @@ p2m_pod_emergency_sweep(struct p2m_domain *p2m) - * NB that this is a zero-sum game; we're increasing our cache size - * by re-increasing our 'debt'. Since we hold the pod lock, - * (entry_count - count) must remain the same. */ -- if ( p2m->pod.count > 0 && i < limit ) -+ if ( i < limit && (p2m->pod.count > 0 || hypercall_preempt_check()) ) - break; - } - -@@ -975,6 +953,58 @@ p2m_pod_emergency_sweep(struct p2m_domain *p2m) - - } - -+static void pod_eager_reclaim(struct p2m_domain *p2m) -+{ -+ struct pod_mrp_list *mrp = &p2m->pod.mrp; -+ unsigned int i = 0; -+ -+ /* -+ * Always check one page for reclaimation. -+ * -+ * If the PoD pool is empty, keep checking some space is found, or all -+ * entries have been exhaused. -+ */ -+ do -+ { -+ unsigned int idx = (mrp->idx + i++) % ARRAY_SIZE(mrp->list); -+ unsigned long gfn = mrp->list[idx]; -+ -+ if ( gfn != INVALID_GFN ) -+ { -+ if ( gfn & POD_LAST_SUPERPAGE ) -+ { -+ gfn &= ~POD_LAST_SUPERPAGE; -+ -+ if ( p2m_pod_zero_check_superpage(p2m, gfn) == 0 ) -+ { -+ unsigned int x; -+ -+ for ( x = 0; x < SUPERPAGE_PAGES; ++x, ++gfn ) -+ p2m_pod_zero_check(p2m, &gfn, 1); -+ } -+ } -+ else -+ p2m_pod_zero_check(p2m, &gfn, 1); -+ -+ mrp->list[idx] = INVALID_GFN; -+ } -+ -+ } while ( (p2m->pod.count == 0) && (i < ARRAY_SIZE(mrp->list)) ); -+} -+ -+static void pod_eager_record(struct p2m_domain *p2m, -+ unsigned long gfn, unsigned int order) -+{ -+ struct pod_mrp_list *mrp = &p2m->pod.mrp; -+ -+ ASSERT(mrp->list[mrp->idx] == INVALID_GFN); -+ ASSERT(gfn != INVALID_GFN); -+ -+ mrp->list[mrp->idx++] = -+ gfn | (order == PAGE_ORDER_2M ? POD_LAST_SUPERPAGE : 0); -+ mrp->idx %= ARRAY_SIZE(mrp->list); -+} -+ - int - p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn, - unsigned int order, -@@ -1015,6 +1045,8 @@ p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn, - return 0; - } - -+ pod_eager_reclaim(p2m); -+ - /* Only sweep if we're actually out of memory. Doing anything else - * causes unnecessary time and fragmentation of superpages in the p2m. */ - if ( p2m->pod.count == 0 ) -@@ -1051,6 +1083,8 @@ p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn, - p2m->pod.entry_count -= (1 << order); - BUG_ON(p2m->pod.entry_count < 0); - -+ pod_eager_record(p2m, gfn_aligned, order); -+ - if ( tb_init_done ) - { - struct { -@@ -1066,12 +1100,6 @@ p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn, - __trace_var(TRC_MEM_POD_POPULATE, 0, sizeof(t), &t); - } - -- /* Check the last guest demand-populate */ -- if ( p2m->pod.entry_count > p2m->pod.count -- && (order == PAGE_ORDER_2M) -- && (q & P2M_ALLOC) ) -- p2m_pod_check_last_super(p2m, gfn_aligned); -- - pod_unlock(p2m); - return 0; - out_of_memory: -diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c -index c6b883d..cbe3f24 100644 ---- a/xen/arch/x86/mm/p2m.c -+++ b/xen/arch/x86/mm/p2m.c -@@ -60,6 +60,7 @@ boolean_param("hap_2mb", opt_hap_2mb); - /* Init the datastructures for later use by the p2m code */ - static int p2m_initialise(struct domain *d, struct p2m_domain *p2m) - { -+ unsigned int i; - int ret = 0; - - mm_rwlock_init(&p2m->lock); -@@ -75,6 +76,9 @@ static int p2m_initialise(struct domain *d, struct p2m_domain *p2m) - - p2m->np2m_base = P2M_BASE_EADDR; - -+ for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i ) -+ p2m->pod.mrp.list[i] = INVALID_GFN; -+ - if ( hap_enabled(d) && cpu_has_vmx ) - ret = ept_p2m_init(p2m); - else -diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h -index 5e99ac6..e91a875 100644 ---- a/xen/include/asm-x86/p2m.h -+++ b/xen/include/asm-x86/p2m.h -@@ -292,10 +292,20 @@ struct p2m_domain { - entry_count; /* # of pages in p2m marked pod */ - unsigned long reclaim_single; /* Last gpfn of a scan */ - unsigned long max_guest; /* gpfn of max guest demand-populate */ --#define POD_HISTORY_MAX 128 -- /* gpfn of last guest superpage demand-populated */ -- unsigned long last_populated[POD_HISTORY_MAX]; -- unsigned int last_populated_index; -+ -+ /* -+ * Tracking of the most recently populated PoD pages, for eager -+ * reclamation. -+ */ -+ struct pod_mrp_list { -+#define NR_POD_MRP_ENTRIES 32 -+ -+/* Encode ORDER_2M superpage in top bit of GFN */ -+#define POD_LAST_SUPERPAGE (INVALID_GFN & ~(INVALID_GFN >> 1)) -+ -+ unsigned long list[NR_POD_MRP_ENTRIES]; -+ unsigned int idx; -+ } mrp; - mm_lock_t lock; /* Locking of private pod structs, * - * not relying on the p2m lock. */ - } pod; diff -Nru xen-4.6.0/debian/patches/CVE-2015-7971.diff xen-4.6.5/debian/patches/CVE-2015-7971.diff --- xen-4.6.0/debian/patches/CVE-2015-7971.diff 2015-11-01 20:38:23.000000000 +0000 +++ xen-4.6.5/debian/patches/CVE-2015-7971.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ -From 2c55f2480b83990081dc43541eebf391635014ca Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Thu, 29 Oct 2015 13:52:02 +0100 -Subject: x86: rate-limit logging in do_xen{oprof,pmu}_op() - -Some of the sub-ops are acessible to all guests, and hence should be -rate-limited. In the xenoprof case, just like for XSA-146, include them -only in debug builds. Since the vPMU code is rather new, allow them to -be always present, but downgrade them to (rate limited) guest messages. - -This is CVE-2015-7971 / XSA-152. - -Signed-off-by: Jan Beulich -Reviewed-by: Ian Campbell -master commit: 95e7415843b94c346e5ba8682665f508f220e04b -master date: 2015-10-29 13:37:19 +0100 - -(cherry picked from commit bdc9fdf9d468cb94ca0fbed1b969c20bf173dc9b) - -Patch-Name: CVE-2015-7971.diff ---- - xen/arch/x86/cpu/vpmu.c | 8 ++++---- - xen/common/xenoprof.c | 9 +++------ - 2 files changed, 7 insertions(+), 10 deletions(-) - -diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c -index 8af3df1..2f5156a 100644 ---- a/xen/arch/x86/cpu/vpmu.c -+++ b/xen/arch/x86/cpu/vpmu.c -@@ -682,8 +682,8 @@ long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) - vpmu_mode = pmu_params.val; - else if ( vpmu_mode != pmu_params.val ) - { -- printk(XENLOG_WARNING -- "VPMU: Cannot change mode while active VPMUs exist\n"); -+ gprintk(XENLOG_WARNING, -+ "VPMU: Cannot change mode while active VPMUs exist\n"); - ret = -EBUSY; - } - -@@ -714,8 +714,8 @@ long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) - vpmu_features = pmu_params.val; - else - { -- printk(XENLOG_WARNING "VPMU: Cannot change features while" -- " active VPMUs exist\n"); -+ gprintk(XENLOG_WARNING, -+ "VPMU: Cannot change features while active VPMUs exist\n"); - ret = -EBUSY; - } - -diff --git a/xen/common/xenoprof.c b/xen/common/xenoprof.c -index 53a803a..19b4605 100644 ---- a/xen/common/xenoprof.c -+++ b/xen/common/xenoprof.c -@@ -676,15 +676,13 @@ ret_t do_xenoprof_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) - - if ( (op < 0) || (op > XENOPROF_last_op) ) - { -- printk("xenoprof: invalid operation %d for domain %d\n", -- op, current->domain->domain_id); -+ gdprintk(XENLOG_DEBUG, "invalid operation %d\n", op); - return -EINVAL; - } - - if ( !NONPRIV_OP(op) && (current->domain != xenoprof_primary_profiler) ) - { -- printk("xenoprof: dom %d denied privileged operation %d\n", -- current->domain->domain_id, op); -+ gdprintk(XENLOG_DEBUG, "denied privileged operation %d\n", op); - return -EPERM; - } - -@@ -907,8 +905,7 @@ ret_t do_xenoprof_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) - spin_unlock(&xenoprof_lock); - - if ( ret < 0 ) -- printk("xenoprof: operation %d failed for dom %d (status : %d)\n", -- op, current->domain->domain_id, ret); -+ gdprintk(XENLOG_DEBUG, "operation %d failed: %d\n", op, ret); - - return ret; - } diff -Nru xen-4.6.0/debian/patches/CVE-2015-7972.diff xen-4.6.5/debian/patches/CVE-2015-7972.diff --- xen-4.6.0/debian/patches/CVE-2015-7972.diff 2015-11-01 20:38:23.000000000 +0000 +++ xen-4.6.5/debian/patches/CVE-2015-7972.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,86 +0,0 @@ -From 2cc6e92b8046952534df6e27abc16740a0ce9b0d Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 21 Oct 2015 16:18:30 +0100 -Subject: libxl: adjust PoD target by memory fudge, too - -PoD guests need to balloon at least as far as required by PoD, or risk -crashing. Currently they don't necessarily know what the right value -is, because our memory accounting is (at the very least) confusing. - -Apply the memory limit fudge factor to the in-hypervisor PoD memory -target, too. This will increase the size of the guest's PoD cache by -the fudge factor LIBXL_MAXMEM_CONSTANT (currently 1Mby). This ensures -that even with a slightly-off balloon driver, the guest will be -stable even under memory pressure. - -There are two call sites of xc_domain_set_pod_target that need fixing: - -The one in libxl_set_memory_target is straightforward. - -The one in xc_hvm_build_x86.c:setup_guest is more awkward. Simply -setting the PoD target differently does not work because the various -amounts of memory during domain construction no longer match up. -Instead, we adjust the guest memory target in xenstore (but only for -PoD guests). - -This introduces a 1Mby discrepancy between the balloon target of a PoD -guest at boot, and the target set by an apparently-equivalent `xl -mem-set' (or similar) later. This approach is low-risk for a security -fix but we need to fix this up properly in xen.git#staging and -probably also in stable trees. - -This is XSA-153. - -Signed-off-by: Ian Jackson -(cherry picked from commit 56fb5fd62320eb40a7517206f9706aa9188d6f7b) - -Patch-Name: CVE-2015-7972.diff ---- - tools/libxl/libxl.c | 2 +- - tools/libxl/libxl_dom.c | 9 ++++++++- - 2 files changed, 9 insertions(+), 2 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index d38d0c7..1366177 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -4815,7 +4815,7 @@ retry_transaction: - } - - rc = xc_domain_set_pod_target(ctx->xch, domid, -- new_target_memkb / 4, NULL, NULL, NULL); -+ (new_target_memkb + LIBXL_MAXMEM_CONSTANT) / 4, NULL, NULL, NULL); - if (rc != 0) { - LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, - "xc_domain_set_pod_target domid=%d, memkb=%d " -diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c -index b514377..8019f4e 100644 ---- a/tools/libxl/libxl_dom.c -+++ b/tools/libxl/libxl_dom.c -@@ -486,6 +486,7 @@ int libxl__build_post(libxl__gc *gc, uint32_t domid, - xs_transaction_t t; - char **ents; - int i, rc; -+ int64_t mem_target_fudge; - - if (info->num_vnuma_nodes && !info->num_vcpu_soft_affinity) { - rc = set_vnuma_affinity(gc, domid, info); -@@ -518,11 +519,17 @@ int libxl__build_post(libxl__gc *gc, uint32_t domid, - } - } - -+ mem_target_fudge = -+ (info->type == LIBXL_DOMAIN_TYPE_HVM && -+ info->max_memkb > info->target_memkb) -+ ? LIBXL_MAXMEM_CONSTANT : 0; -+ - ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *)); - ents[0] = "memory/static-max"; - ents[1] = GCSPRINTF("%"PRId64, info->max_memkb); - ents[2] = "memory/target"; -- ents[3] = GCSPRINTF("%"PRId64, info->target_memkb - info->video_memkb); -+ ents[3] = GCSPRINTF("%"PRId64, info->target_memkb - info->video_memkb -+ - mem_target_fudge); - ents[4] = "memory/videoram"; - ents[5] = GCSPRINTF("%"PRId64, info->video_memkb); - ents[6] = "domid"; diff -Nru xen-4.6.0/debian/patches/series xen-4.6.5/debian/patches/series --- xen-4.6.0/debian/patches/series 2017-01-10 13:54:04.000000000 +0000 +++ xen-4.6.5/debian/patches/series 2017-05-09 13:44:03.000000000 +0000 @@ -23,90 +23,42 @@ tools-include-install.diff tools-xenmon-install.diff tools-xenstore-compatibility.diff -CVE-2015-7812.diff -CVE-2015-7813.diff -CVE-2015-7814.diff -CVE-2015-7835.diff -CVE-2015-7969.diff -CVE-2015-7970.diff -CVE-2015-7969.1.diff -CVE-2015-7971.diff -CVE-2015-7972.diff # Additional fixes ubuntu-config-prefix-fix.patch +ubuntu-x86emul-fix-compile.patch # Additional security fixes -xsa156.patch -xsa155-xen-0001-xen-Add-RING_COPY_REQUEST.patch -xsa155-xen-0002-blktap2-Use-RING_COPY_REQUEST.patch -xsa155-xen-0003-libvchan-Read-prod-cons-only-once.patch -xsa158.patch -xsa158-fix.patch -xsa159.patch -xsa160-4.6.patch -xsa165-4.6.patch -xsa166.patch -xsa167-4.6.patch -xsa168.patch -xsa169.patch -xsa170.patch -xsa154-4.6.patch -xsa172.patch -xsa173-4.6.patch -xsa175-4.6-0001-libxl-Record-backend-frontend-paths-in-libxl-DOMID.patch -xsa175-4.6-0002-libxl-Provide-libxl__backendpath_parse_domid.patch -xsa175-4.6-0003-libxl-Do-not-trust-frontend-in-libxl__devices_destro.patch -xsa175-4.6-0004-libxl-Do-not-trust-frontend-in-libxl__device_nextid.patch -xsa175-4.6-0005-libxl-Do-not-trust-frontend-for-disk-eject-event.patch -xsa175-4.6-0006-libxl-Do-not-trust-frontend-for-disk-in-getinfo.patch -xsa175-4.6-0007-libxl-Do-not-trust-frontend-for-vtpm-list.patch -xsa175-4.6-0008-libxl-Do-not-trust-frontend-for-vtpm-in-getinfo.patch -xsa175-4.6-0009-libxl-Do-not-trust-frontend-for-nic-in-libxl_devid_t.patch -xsa175-4.6-0010-libxl-Do-not-trust-frontend-for-nic-in-getinfo.patch -xsa175-4.6-0011-libxl-Do-not-trust-frontend-for-channel-in-list.patch -xsa175-4.6-0012-libxl-Do-not-trust-frontend-for-channel-in-getinfo.patch -xsa175-4.6-0013-libxl-Cleanup-Have-libxl__alloc_vdev-use-libxl.patch -xsa175-4.6-0014-libxl-Document-serial-correctly.patch -xsa176.patch -xsa178-unstable-0001-libxl-Make-copy-of-every-xs-backend-in-libxl-in-_gen.patch -xsa178-unstable-0002-libxl-Do-not-trust-backend-in-libxl__device_exists.patch -xsa178-unstable-0003-libxl-Do-not-trust-backend-for-vtpm-in-getinfo-excep.patch -xsa178-unstable-0004-libxl-Do-not-trust-backend-for-vtpm-in-getinfo-uuid.patch -xsa178-unstable-0005-libxl-cdrom-eject-and-insert-write-to-libxl.patch -xsa178-unstable-0006-libxl-Do-not-trust-backend-for-disk-eject-vdev.patch -xsa178-unstable-0007-libxl-Do-not-trust-backend-for-disk-fix-driver-domai.patch -xsa178-unstable-0008-libxl-Do-not-trust-backend-for-disk-in-getinfo.patch -xsa178-unstable-0009-libxl-Do-not-trust-backend-for-cdrom-insert.patch -xsa178-unstable-0010-libxl-Do-not-trust-backend-for-channel-in-getinfo.patch -xsa178-unstable-0011-libxl-Rename-libxl__device_-nic-channel-_from_xs_be-.patch -xsa178-unstable-0012-libxl-Rename-READ_BACKEND-to-READ_LIBXLDEV.patch -xsa178-unstable-0013-libxl-Have-READ_LIBXLDEV-use-libxl_path-rather-than-.patch -xsa178-unstable-0014-libxl-Do-not-trust-backend-in-nic-getinfo.patch -xsa178-unstable-0015-libxl-Do-not-trust-backend-for-nic-in-devid_to_devic.patch -xsa178-unstable-0016-libxl-Do-not-trust-backend-for-nic-in-list.patch -xsa178-unstable-0017-libxl-Do-not-trust-backend-in-channel-list.patch -xsa178-unstable-0018-libxl-Cleanup-use-libxl__backendpath_parse_domid-in-.patch -xsa178-unstable-0019-libxl-Fix-NULL-pointer-due-to-XSA-178-fix-wrong-XS-n.patch -xsa181.patch -xsa182-4.6.patch -xsa183-4.6.patch -xsa185.patch -xsa187-4.7-0001-x86-shadow-Avoid-overflowing-sh_ctxt-seg.patch -xsa187-4.6-0002-x86-segment-Bounds-check-accesses-to-emulation-ctx.patch -xsa190-4.6.patch -xsa191-4.6.patch -xsa192.patch -xsa193-4.7.patch -xsa195.patch -xsa196-0001-x86-emul-Correct-the-IDT-entry-calculation-in-inject.patch -xsa196-0002-x86-svm-Fix-injection-of-software-interrupts.patch -xsa198.patch -xsa200-4.6.patch -xsa201-1.patch -xsa201-2.patch -xsa201-3-4.7.patch -xsa201-4.patch -xsa202-4.6.patch -xsa203-4.7.patch -xsa204-4.7.patch +# XSA-206 +xsa206-4.6-0001-xenstored-apply-a-write-transaction-rate-limit.patch +xsa206-4.6-0002-xenstored-Log-when-the-write-transaction-rate-limit-.patch +xsa206-4.6-0003-oxenstored-refactor-putting-response-on-wire.patch +xsa206-4.6-0004-oxenstored-remove-some-unused-parameters.patch +xsa206-4.6-0005-oxenstored-refactor-request-processing.patch +xsa206-4.6-0006-oxenstored-keep-track-of-each-transaction-s-operatio.patch +xsa206-4.6-0007-oxenstored-move-functions-that-process-simple-operat.patch +xsa206-4.6-0008-oxenstored-replay-transaction-upon-conflict.patch +xsa206-4.6-0009-oxenstored-log-request-and-response-during-transacti.patch +xsa206-4.6-0010-oxenstored-allow-compilation-prior-to-OCaml-3.12.0.patch +xsa206-4.6-0011-oxenstored-comments-explaining-some-variables.patch +xsa206-4.6-0012-oxenstored-handling-of-domain-conflict-credit.patch +xsa206-4.6-0013-oxenstored-ignore-domains-with-no-conflict-credit.patch +xsa206-4.6-0014-oxenstored-add-transaction-info-relevant-to-history-.patch +xsa206-4.6-0015-oxenstored-support-commit-history-tracking.patch +xsa206-4.6-0016-oxenstored-only-record-operations-with-side-effects-.patch +xsa206-4.6-0017-oxenstored-discard-old-commit-history-on-txn-end.patch +xsa206-4.6-0018-oxenstored-track-commit-history.patch +xsa206-4.6-0019-oxenstored-blame-the-connection-that-caused-a-transa.patch +xsa206-4.6-0020-oxenstored-allow-self-conflicts.patch +xsa206-4.6-0021-oxenstored-do-not-commit-read-only-transactions.patch +xsa206-4.6-0022-oxenstored-don-t-wake-to-issue-no-conflict-credit.patch +xsa206-4.6-0023-oxenstored-transaction-conflicts-improve-logging.patch +xsa206-4.6-0024-oxenstored-trim-history-in-the-frequent_ops-function.patch +# XSA-212 +xsa212.patch +# XSA-213 +xsa213-4.6.patch +# XSA-214 +xsa214.patch +# XSA-215 +xsa215.patch diff -Nru xen-4.6.0/debian/patches/tools-xenstat-abiname.diff xen-4.6.5/debian/patches/tools-xenstat-abiname.diff --- xen-4.6.0/debian/patches/tools-xenstat-abiname.diff 2015-11-01 20:38:20.000000000 +0000 +++ xen-4.6.5/debian/patches/tools-xenstat-abiname.diff 2017-03-10 14:28:49.000000000 +0000 @@ -8,10 +8,10 @@ tools/xenstat/libxenstat/Makefile | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) -diff --git a/tools/xenstat/libxenstat/Makefile b/tools/xenstat/libxenstat/Makefile -index 850d24a..7c13c06 100644 ---- a/tools/xenstat/libxenstat/Makefile -+++ b/tools/xenstat/libxenstat/Makefile +Index: xen-4.6.5/tools/xenstat/libxenstat/Makefile +=================================================================== +--- xen-4.6.5.orig/tools/xenstat/libxenstat/Makefile ++++ xen-4.6.5/tools/xenstat/libxenstat/Makefile @@ -18,18 +18,14 @@ include $(XEN_ROOT)/tools/Rules.mk LDCONFIG=ldconfig MAKE_LINK=ln -sf @@ -34,16 +34,16 @@ CFLAGS+=-fPIC CFLAGS+=-Isrc $(CFLAGS_libxenctrl) $(CFLAGS_libxenstore) $(CFLAGS_xeninclude) -include $(XEN_ROOT)/tools/config.h -@@ -38,7 +34,7 @@ LDLIBS-y = $(LDLIBS_libxenstore) $(LDLIBS_libxenctrl) +@@ -38,7 +34,7 @@ LDLIBS-y = $(LDLIBS_libxenstore) $(LDLIB LDLIBS-$(CONFIG_SunOS) += -lkstat .PHONY: all -all: $(LIB) $(SHLIB) $(SHLIB_LINKS) +all: $(LIB) $(SHLIB) - $(LIB): $(OBJECTS-y) - $(AR) rc $@ $^ -@@ -48,19 +44,11 @@ $(SHLIB): $(OBJECTS-y) + $(OBJECTS-y): src/_paths.h + +@@ -50,19 +46,11 @@ $(SHLIB): $(OBJECTS-y) $(CC) $(LDFLAGS) $(SONAME_FLAGS) $(SHLIB_LDFLAGS) -o $@ \ $(OBJECTS-y) $(LDLIBS-y) $(APPEND_LDFLAGS) diff -Nru xen-4.6.0/debian/patches/ubuntu-x86emul-fix-compile.patch xen-4.6.5/debian/patches/ubuntu-x86emul-fix-compile.patch --- xen-4.6.0/debian/patches/ubuntu-x86emul-fix-compile.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/ubuntu-x86emul-fix-compile.patch 2017-03-10 17:26:27.000000000 +0000 @@ -0,0 +1,20 @@ +Description: Fix FTBS due to missing macro + MASK_EXTR is defined in xen/include/lib.h but that path seems not to be + added in the gcc call, so just copy the definition directly into + x86_emulate.c. +Author: Stefan Bader +Forwarded: yes + +Index: xen-4.6.5/xen/arch/x86/x86_emulate/x86_emulate.c +=================================================================== +--- xen-4.6.5.orig/xen/arch/x86/x86_emulate/x86_emulate.c ++++ xen-4.6.5/xen/arch/x86/x86_emulate/x86_emulate.c +@@ -20,6 +20,8 @@ + * along with this program; If not, see . + */ + ++#define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m))) ++ + /* Operand sizes: 8-bit operands or specified/overridden size. */ + #define ByteOp (1<<0) /* 8-bit operands. */ + /* Destination operand type. */ diff -Nru xen-4.6.0/debian/patches/xsa154-4.6.patch xen-4.6.5/debian/patches/xsa154-4.6.patch --- xen-4.6.0/debian/patches/xsa154-4.6.patch 2016-02-17 16:12:20.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa154-4.6.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,359 +0,0 @@ -x86: enforce consistent cachability of MMIO mappings - -We've been told by Intel that inconsistent cachability between -multiple mappings of the same page can affect system stability only -when the affected page is an MMIO one. Since the stale data issue is -of no relevance to the hypervisor (since all guest memory accesses go -through proper accessors and validation), handling of RAM pages -remains unchanged here. Any MMIO mapped by domains however needs to be -done consistently (all cachable mappings or all uncachable ones), in -order to avoid Machine Check exceptions. Since converting existing -cachable mappings to uncachable (at the time an uncachable mapping -gets established) would in the PV case require tracking all mappings, -allow MMIO to only get mapped uncachable (UC, UC-, or WC). - -This also implies that in the PV case we mustn't use the L1 PTE update -fast path when cachability flags get altered. - -Since in the HVM case at least for now we want to continue honoring -pinned cachability attributes for pages not mapped by the hypervisor, -special case handling of r/o MMIO pages (forcing UC) gets added there. -Arguably the counterpart change to p2m-pt.c may not be necessary, since -UC- (which already gets enforced there) is probably strict enough. - -Note that the shadow code changes include fixing the write protection -of r/o MMIO ranges: shadow_l1e_remove_flags() and its siblings, other -than l1e_remove_flags() and alike, return the new PTE (and hence -ignoring their return values makes them no-ops). - -This is CVE-2016-2270 / XSA-154. - -Signed-off-by: Jan Beulich -Acked-by: Andrew Cooper - ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -1080,6 +1080,15 @@ limit is ignored by Xen. - - Specify if the MMConfig space should be enabled. - -+### mmio-relax -+> `= | all` -+ -+> Default: `false` -+ -+By default, domains may not create cached mappings to MMIO regions. -+This option relaxes the check for Domain 0 (or when using `all`, all PV -+domains), to permit the use of cacheable MMIO mappings. -+ - ### msi - > `= ` - ---- a/xen/arch/x86/hvm/mtrr.c -+++ b/xen/arch/x86/hvm/mtrr.c -@@ -807,8 +807,17 @@ int epte_get_entry_emt(struct domain *d, - if ( v->domain != d ) - v = d->vcpu ? d->vcpu[0] : NULL; - -- if ( !mfn_valid(mfn_x(mfn)) ) -+ if ( !mfn_valid(mfn_x(mfn)) || -+ rangeset_contains_range(mmio_ro_ranges, mfn_x(mfn), -+ mfn_x(mfn) + (1UL << order) - 1) ) -+ { -+ *ipat = 1; - return MTRR_TYPE_UNCACHABLE; -+ } -+ -+ if ( rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn), -+ mfn_x(mfn) + (1UL << order) - 1) ) -+ return -1; - - switch ( hvm_get_mem_pinned_cacheattr(d, gfn, order, &type) ) - { ---- a/xen/arch/x86/mm/p2m-pt.c -+++ b/xen/arch/x86/mm/p2m-pt.c -@@ -107,6 +107,8 @@ static unsigned long p2m_type_to_flags(p - case p2m_mmio_direct: - if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) ) - flags |= _PAGE_RW; -+ else -+ flags |= _PAGE_PWT; - return flags | P2M_BASE_FLAGS | _PAGE_PCD; - } - } ---- a/xen/arch/x86/mm/shadow/multi.c -+++ b/xen/arch/x86/mm/shadow/multi.c -@@ -519,6 +519,7 @@ _sh_propagate(struct vcpu *v, - gfn_t target_gfn = guest_l1e_get_gfn(guest_entry); - u32 pass_thru_flags; - u32 gflags, sflags; -+ bool_t mmio_mfn; - - /* We don't shadow PAE l3s */ - ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); -@@ -559,7 +560,10 @@ _sh_propagate(struct vcpu *v, - // mfn means that we can not usefully shadow anything, and so we - // return early. - // -- if ( !mfn_valid(target_mfn) -+ mmio_mfn = !mfn_valid(target_mfn) -+ || (level == 1 -+ && page_get_owner(mfn_to_page(target_mfn)) == dom_io); -+ if ( mmio_mfn - && !(level == 1 && (!shadow_mode_refcounts(d) - || p2mt == p2m_mmio_direct)) ) - { -@@ -577,7 +581,7 @@ _sh_propagate(struct vcpu *v, - _PAGE_RW | _PAGE_PRESENT); - if ( guest_supports_nx(v) ) - pass_thru_flags |= _PAGE_NX_BIT; -- if ( !shadow_mode_refcounts(d) && !mfn_valid(target_mfn) ) -+ if ( level == 1 && !shadow_mode_refcounts(d) && mmio_mfn ) - pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT; - sflags = gflags & pass_thru_flags; - -@@ -676,10 +680,14 @@ _sh_propagate(struct vcpu *v, - } - - /* Read-only memory */ -- if ( p2m_is_readonly(p2mt) || -- (p2mt == p2m_mmio_direct && -- rangeset_contains_singleton(mmio_ro_ranges, mfn_x(target_mfn))) ) -+ if ( p2m_is_readonly(p2mt) ) - sflags &= ~_PAGE_RW; -+ else if ( p2mt == p2m_mmio_direct && -+ rangeset_contains_singleton(mmio_ro_ranges, mfn_x(target_mfn)) ) -+ { -+ sflags &= ~(_PAGE_RW | _PAGE_PAT); -+ sflags |= _PAGE_PCD | _PAGE_PWT; -+ } - - // protect guest page tables - // -@@ -1185,22 +1193,28 @@ static int shadow_set_l1e(struct domain - && !sh_l1e_is_magic(new_sl1e) ) - { - /* About to install a new reference */ -- if ( shadow_mode_refcounts(d) ) { -+ if ( shadow_mode_refcounts(d) ) -+ { -+#define PAGE_FLIPPABLE (_PAGE_RW | _PAGE_PWT | _PAGE_PCD | _PAGE_PAT) -+ int rc; -+ - TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF); -- switch ( shadow_get_page_from_l1e(new_sl1e, d, new_type) ) -+ switch ( rc = shadow_get_page_from_l1e(new_sl1e, d, new_type) ) - { - default: - /* Doesn't look like a pagetable. */ - flags |= SHADOW_SET_ERROR; - new_sl1e = shadow_l1e_empty(); - break; -- case 1: -- shadow_l1e_remove_flags(new_sl1e, _PAGE_RW); -+ case PAGE_FLIPPABLE & -PAGE_FLIPPABLE ... PAGE_FLIPPABLE: -+ ASSERT(!(rc & ~PAGE_FLIPPABLE)); -+ new_sl1e = shadow_l1e_flip_flags(new_sl1e, rc); - /* fall through */ - case 0: - shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d); - break; - } -+#undef PAGE_FLIPPABLE - } - } - ---- a/xen/arch/x86/mm/shadow/types.h -+++ b/xen/arch/x86/mm/shadow/types.h -@@ -99,6 +99,9 @@ static inline u32 shadow_l4e_get_flags(s - static inline shadow_l1e_t - shadow_l1e_remove_flags(shadow_l1e_t sl1e, u32 flags) - { l1e_remove_flags(sl1e, flags); return sl1e; } -+static inline shadow_l1e_t -+shadow_l1e_flip_flags(shadow_l1e_t sl1e, u32 flags) -+{ l1e_flip_flags(sl1e, flags); return sl1e; } - - static inline shadow_l1e_t shadow_l1e_empty(void) - { return l1e_empty(); } ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -178,6 +178,18 @@ static uint32_t base_disallow_mask; - is_pv_domain(d)) ? \ - L1_DISALLOW_MASK : (L1_DISALLOW_MASK & ~PAGE_CACHE_ATTRS)) - -+static s8 __read_mostly opt_mmio_relax; -+static void __init parse_mmio_relax(const char *s) -+{ -+ if ( !*s ) -+ opt_mmio_relax = 1; -+ else -+ opt_mmio_relax = parse_bool(s); -+ if ( opt_mmio_relax < 0 && strcmp(s, "all") ) -+ opt_mmio_relax = 0; -+} -+custom_param("mmio-relax", parse_mmio_relax); -+ - static void __init init_frametable_chunk(void *start, void *end) - { - unsigned long s = (unsigned long)start; -@@ -799,10 +811,7 @@ get_page_from_l1e( - if ( !mfn_valid(mfn) || - (real_pg_owner = page_get_owner_and_reference(page)) == dom_io ) - { --#ifndef NDEBUG -- const unsigned long *ro_map; -- unsigned int seg, bdf; --#endif -+ int flip = 0; - - /* Only needed the reference to confirm dom_io ownership. */ - if ( mfn_valid(mfn) ) -@@ -836,24 +845,55 @@ get_page_from_l1e( - return -EINVAL; - } - -- if ( !(l1f & _PAGE_RW) || -- !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) -- return 0; -+ if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) -+ { -+ /* MMIO pages must not be mapped cachable unless requested so. */ -+ switch ( opt_mmio_relax ) -+ { -+ case 0: -+ break; -+ case 1: -+ if ( is_hardware_domain(l1e_owner) ) -+ case -1: -+ return 0; -+ default: -+ ASSERT_UNREACHABLE(); -+ } -+ } -+ else if ( l1f & _PAGE_RW ) -+ { - #ifndef NDEBUG -- if ( !pci_mmcfg_decode(mfn, &seg, &bdf) || -- ((ro_map = pci_get_ro_map(seg)) != NULL && -- test_bit(bdf, ro_map)) ) -- printk(XENLOG_G_WARNING -- "d%d: Forcing read-only access to MFN %lx\n", -- l1e_owner->domain_id, mfn); -- else -- rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, -- print_mmio_emul_range, -- &(struct mmio_emul_range_ctxt){ -- .d = l1e_owner, -- .mfn = mfn }); -+ const unsigned long *ro_map; -+ unsigned int seg, bdf; -+ -+ if ( !pci_mmcfg_decode(mfn, &seg, &bdf) || -+ ((ro_map = pci_get_ro_map(seg)) != NULL && -+ test_bit(bdf, ro_map)) ) -+ printk(XENLOG_G_WARNING -+ "d%d: Forcing read-only access to MFN %lx\n", -+ l1e_owner->domain_id, mfn); -+ else -+ rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, -+ print_mmio_emul_range, -+ &(struct mmio_emul_range_ctxt){ -+ .d = l1e_owner, -+ .mfn = mfn }); - #endif -- return 1; -+ flip = _PAGE_RW; -+ } -+ -+ switch ( l1f & PAGE_CACHE_ATTRS ) -+ { -+ case 0: /* WB */ -+ flip |= _PAGE_PWT | _PAGE_PCD; -+ break; -+ case _PAGE_PWT: /* WT */ -+ case _PAGE_PWT | _PAGE_PAT: /* WP */ -+ flip |= _PAGE_PCD | (l1f & _PAGE_PAT); -+ break; -+ } -+ -+ return flip; - } - - if ( unlikely( (real_pg_owner != pg_owner) && -@@ -1243,8 +1283,9 @@ static int alloc_l1_table(struct page_in - goto fail; - case 0: - break; -- case 1: -- l1e_remove_flags(pl1e[i], _PAGE_RW); -+ case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: -+ ASSERT(!(ret & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); -+ l1e_flip_flags(pl1e[i], ret); - break; - } - -@@ -1759,8 +1800,9 @@ static int mod_l1_entry(l1_pgentry_t *pl - return -EINVAL; - } - -- /* Fast path for identical mapping, r/w and presence. */ -- if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) -+ /* Fast path for identical mapping, r/w, presence, and cachability. */ -+ if ( !l1e_has_changed(ol1e, nl1e, -+ PAGE_CACHE_ATTRS | _PAGE_RW | _PAGE_PRESENT) ) - { - adjust_guest_l1e(nl1e, pt_dom); - if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, -@@ -1783,8 +1825,9 @@ static int mod_l1_entry(l1_pgentry_t *pl - return rc; - case 0: - break; -- case 1: -- l1e_remove_flags(nl1e, _PAGE_RW); -+ case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: -+ ASSERT(!(rc & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); -+ l1e_flip_flags(nl1e, rc); - rc = 0; - break; - } -@@ -5000,6 +5043,7 @@ static int ptwr_emulated_update( - l1_pgentry_t pte, ol1e, nl1e, *pl1e; - struct vcpu *v = current; - struct domain *d = v->domain; -+ int ret; - - /* Only allow naturally-aligned stores within the original %cr2 page. */ - if ( unlikely(((addr^ptwr_ctxt->cr2) & PAGE_MASK) || (addr & (bytes-1))) ) -@@ -5047,7 +5091,7 @@ static int ptwr_emulated_update( - - /* Check the new PTE. */ - nl1e = l1e_from_intpte(val); -- switch ( get_page_from_l1e(nl1e, d, d) ) -+ switch ( ret = get_page_from_l1e(nl1e, d, d) ) - { - default: - if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) && -@@ -5071,8 +5115,9 @@ static int ptwr_emulated_update( - break; - case 0: - break; -- case 1: -- l1e_remove_flags(nl1e, _PAGE_RW); -+ case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: -+ ASSERT(!(ret & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); -+ l1e_flip_flags(nl1e, ret); - break; - } - ---- a/xen/include/asm-x86/page.h -+++ b/xen/include/asm-x86/page.h -@@ -157,6 +157,9 @@ static inline l4_pgentry_t l4e_from_padd - #define l3e_remove_flags(x, flags) ((x).l3 &= ~put_pte_flags(flags)) - #define l4e_remove_flags(x, flags) ((x).l4 &= ~put_pte_flags(flags)) - -+/* Flip flags in an existing L1 PTE. */ -+#define l1e_flip_flags(x, flags) ((x).l1 ^= put_pte_flags(flags)) -+ - /* Check if a pte's page mapping or significant access flags have changed. */ - #define l1e_has_changed(x,y,flags) \ - ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) diff -Nru xen-4.6.0/debian/patches/xsa155-xen-0001-xen-Add-RING_COPY_REQUEST.patch xen-4.6.5/debian/patches/xsa155-xen-0001-xen-Add-RING_COPY_REQUEST.patch --- xen-4.6.0/debian/patches/xsa155-xen-0001-xen-Add-RING_COPY_REQUEST.patch 2015-12-16 11:10:28.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa155-xen-0001-xen-Add-RING_COPY_REQUEST.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -From 8937a9c6cdfd39b4e4a3ef5b3f9b5b21e614779d Mon Sep 17 00:00:00 2001 -From: David Vrabel -Date: Fri, 20 Nov 2015 11:59:05 -0500 -Subject: [PATCH 1/3] xen: Add RING_COPY_REQUEST() - -Using RING_GET_REQUEST() on a shared ring is easy to use incorrectly -(i.e., by not considering that the other end may alter the data in the -shared ring while it is being inspected). Safe usage of a request -generally requires taking a local copy. - -Provide a RING_COPY_REQUEST() macro to use instead of -RING_GET_REQUEST() and an open-coded memcpy(). This takes care of -ensuring that the copy is done correctly regardless of any possible -compiler optimizations. - -Use a volatile source to prevent the compiler from reordering or -omitting the copy. - -This is part of XSA155. - -Signed-off-by: David Vrabel -Signed-off-by: Konrad Rzeszutek Wilk ---- - xen/include/public/io/ring.h | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/xen/include/public/io/ring.h b/xen/include/public/io/ring.h -index ba9401b..f40cf35 100644 ---- a/xen/include/public/io/ring.h -+++ b/xen/include/public/io/ring.h -@@ -212,6 +212,17 @@ typedef struct __name##_back_ring __name##_back_ring_t - #define RING_GET_REQUEST(_r, _idx) \ - (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) - -+/* -+ * Get a local copy of a request. -+ * -+ * Use this in preference to RING_GET_REQUEST() so all processing is -+ * done on a local copy that cannot be modified by the other end. -+ */ -+#define RING_COPY_REQUEST(_r, _idx, _req) do { \ -+ /* Use volatile to force the copy into _req. */ \ -+ *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ -+} while (0) -+ - #define RING_GET_RESPONSE(_r, _idx) \ - (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) - --- -2.1.0 - diff -Nru xen-4.6.0/debian/patches/xsa155-xen-0002-blktap2-Use-RING_COPY_REQUEST.patch xen-4.6.5/debian/patches/xsa155-xen-0002-blktap2-Use-RING_COPY_REQUEST.patch --- xen-4.6.0/debian/patches/xsa155-xen-0002-blktap2-Use-RING_COPY_REQUEST.patch 2015-12-16 11:10:36.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa155-xen-0002-blktap2-Use-RING_COPY_REQUEST.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -From fdd6f8cd7553e7ae8922637cb3b93eb7fcade207 Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk -Date: Fri, 20 Nov 2015 12:16:02 -0500 -Subject: [PATCH 2/3] blktap2: Use RING_COPY_REQUEST - -Instead of RING_GET_REQUEST. Using a local copy of the -ring (and also with proper memory barriers) will mean -we can do not have to worry about the compiler optimizing -the code and doing a double-fetch in the shared memory space. - -This is part of XSA155. - -Signed-off-by: Konrad Rzeszutek Wilk ---- - tools/blktap2/drivers/block-log.c | 3 ++- - tools/blktap2/drivers/tapdisk-vbd.c | 4 ++-- - 2 files changed, 4 insertions(+), 3 deletions(-) - -diff --git a/tools/blktap2/drivers/block-log.c b/tools/blktap2/drivers/block-log.c -index 5330cdc..5f3bd35 100644 ---- a/tools/blktap2/drivers/block-log.c -+++ b/tools/blktap2/drivers/block-log.c -@@ -494,11 +494,12 @@ static int ctl_kick(struct tdlog_state* s, int fd) - reqstart = s->bring.req_cons; - reqend = s->sring->req_prod; - -+ xen_mb(); - BDPRINTF("ctl: ring kicked (start = %u, end = %u)", reqstart, reqend); - - while (reqstart != reqend) { - /* XXX actually submit these! */ -- memcpy(&req, RING_GET_REQUEST(&s->bring, reqstart), sizeof(req)); -+ RING_COPY_REQUEST(&s->bring, reqstart, &req); - BDPRINTF("ctl: read request %"PRIu64":%u", req.sector, req.count); - s->bring.req_cons = ++reqstart; - -diff --git a/tools/blktap2/drivers/tapdisk-vbd.c b/tools/blktap2/drivers/tapdisk-vbd.c -index 6d1d94a..77f2854 100644 ---- a/tools/blktap2/drivers/tapdisk-vbd.c -+++ b/tools/blktap2/drivers/tapdisk-vbd.c -@@ -1555,7 +1555,7 @@ tapdisk_vbd_pull_ring_requests(td_vbd_t *vbd) - int idx; - RING_IDX rp, rc; - td_ring_t *ring; -- blkif_request_t *req; -+ blkif_request_t req; - td_vbd_request_t *vreq; - - ring = &vbd->ring; -@@ -1566,7 +1566,7 @@ tapdisk_vbd_pull_ring_requests(td_vbd_t *vbd) - xen_rmb(); - - for (rc = ring->fe_ring.req_cons; rc != rp; rc++) { -- req = RING_GET_REQUEST(&ring->fe_ring, rc); -+ RING_COPY_REQUEST(&ring->fe_ring, rc, &req); - ++ring->fe_ring.req_cons; - - idx = req->id; --- -2.1.0 - diff -Nru xen-4.6.0/debian/patches/xsa155-xen-0003-libvchan-Read-prod-cons-only-once.patch xen-4.6.5/debian/patches/xsa155-xen-0003-libvchan-Read-prod-cons-only-once.patch --- xen-4.6.0/debian/patches/xsa155-xen-0003-libvchan-Read-prod-cons-only-once.patch 2015-12-16 11:10:43.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa155-xen-0003-libvchan-Read-prod-cons-only-once.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,41 +0,0 @@ -From c1fce65e2b720684ea6ba76ae59921542bd154bb Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk -Date: Fri, 20 Nov 2015 12:22:14 -0500 -Subject: [PATCH 3/3] libvchan: Read prod/cons only once. - -We must ensure that the prod/cons are only read once and that -the compiler won't try to optimize the reads. That is split -the read of these in multiple instructions influencing later -branch code. As such insert barriers when fetching the cons -and prod index. - -This is part of XSA155. - -Signed-off-by: Konrad Rzeszutek Wilk ---- - tools/libvchan/io.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tools/libvchan/io.c b/tools/libvchan/io.c -index 8a9629b..381cc05 100644 ---- a/tools/libvchan/io.c -+++ b/tools/libvchan/io.c -@@ -117,6 +117,7 @@ static inline int send_notify(struct libxenvchan *ctrl, uint8_t bit) - static inline int raw_get_data_ready(struct libxenvchan *ctrl) - { - uint32_t ready = rd_prod(ctrl) - rd_cons(ctrl); -+ xen_mb(); /* Ensure 'ready' is read only once. */ - if (ready > rd_ring_size(ctrl)) - /* We have no way to return errors. Locking up the ring is - * better than the alternatives. */ -@@ -158,6 +159,7 @@ int libxenvchan_data_ready(struct libxenvchan *ctrl) - static inline int raw_get_buffer_space(struct libxenvchan *ctrl) - { - uint32_t ready = wr_ring_size(ctrl) - (wr_prod(ctrl) - wr_cons(ctrl)); -+ xen_mb(); /* Ensure 'ready' is read only once. */ - if (ready > wr_ring_size(ctrl)) - /* We have no way to return errors. Locking up the ring is - * better than the alternatives. */ --- -2.1.0 - diff -Nru xen-4.6.0/debian/patches/xsa156.patch xen-4.6.5/debian/patches/xsa156.patch --- xen-4.6.0/debian/patches/xsa156.patch 2015-11-25 15:04:36.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa156.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,127 +0,0 @@ -x86/HVM: always intercept #AC and #DB - -Both being benign exceptions, and both being possible to get triggered -by exception delivery, this is required to prevent a guest from locking -up a CPU (resulting from no other VM exits occurring once getting into -such a loop). - -The specific scenarios: - -1) #AC may be raised during exception delivery if the handler is set to -be a ring-3 one by a 32-bit guest, and the stack is misaligned. - -2) #DB may be raised during exception delivery when a breakpoint got -placed on a data structure involved in delivering the exception. This -can result in an endless loop when a 64-bit guest uses a non-zero IST -for the vector 1 IDT entry, but even without use of IST the time it -takes until a contributory fault would get raised (results depending -on the handler) may be quite long. - -This is XSA-156. - -Reported-by: Benjamin Serebrin -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Tested-by: Andrew Cooper - ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -1043,10 +1043,11 @@ static void noreturn svm_do_resume(struc - unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) ) - { - uint32_t intercepts = vmcb_get_exception_intercepts(vmcb); -- uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3); -+ - v->arch.hvm_vcpu.debug_state_latch = debug_state; - vmcb_set_exception_intercepts( -- vmcb, debug_state ? (intercepts | mask) : (intercepts & ~mask)); -+ vmcb, debug_state ? (intercepts | (1U << TRAP_int3)) -+ : (intercepts & ~(1U << TRAP_int3))); - } - - if ( v->arch.hvm_svm.launch_core != smp_processor_id() ) -@@ -2434,8 +2435,9 @@ void svm_vmexit_handler(struct cpu_user_ - - case VMEXIT_EXCEPTION_DB: - if ( !v->domain->debugger_attached ) -- goto unexpected_exit_type; -- domain_pause_for_debugger(); -+ hvm_inject_hw_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE); -+ else -+ domain_pause_for_debugger(); - break; - - case VMEXIT_EXCEPTION_BP: -@@ -2483,6 +2485,11 @@ void svm_vmexit_handler(struct cpu_user_ - break; - } - -+ case VMEXIT_EXCEPTION_AC: -+ HVMTRACE_1D(TRAP, TRAP_alignment_check); -+ hvm_inject_hw_exception(TRAP_alignment_check, vmcb->exitinfo1); -+ break; -+ - case VMEXIT_EXCEPTION_UD: - svm_vmexit_ud_intercept(regs); - break; ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -1224,16 +1224,10 @@ static void vmx_update_host_cr3(struct v - - void vmx_update_debug_state(struct vcpu *v) - { -- unsigned long mask; -- -- mask = 1u << TRAP_int3; -- if ( !cpu_has_monitor_trap_flag ) -- mask |= 1u << TRAP_debug; -- - if ( v->arch.hvm_vcpu.debug_state_latch ) -- v->arch.hvm_vmx.exception_bitmap |= mask; -+ v->arch.hvm_vmx.exception_bitmap |= 1U << TRAP_int3; - else -- v->arch.hvm_vmx.exception_bitmap &= ~mask; -+ v->arch.hvm_vmx.exception_bitmap &= ~(1U << TRAP_int3); - - vmx_vmcs_enter(v); - vmx_update_exception_bitmap(v); -@@ -3060,9 +3054,10 @@ void vmx_vmexit_handler(struct cpu_user_ - __vmread(EXIT_QUALIFICATION, &exit_qualification); - HVMTRACE_1D(TRAP_DEBUG, exit_qualification); - write_debugreg(6, exit_qualification | DR_STATUS_RESERVED_ONE); -- if ( !v->domain->debugger_attached || cpu_has_monitor_trap_flag ) -- goto exit_and_crash; -- domain_pause_for_debugger(); -+ if ( !v->domain->debugger_attached ) -+ hvm_inject_hw_exception(vector, HVM_DELIVER_NO_ERROR_CODE); -+ else -+ domain_pause_for_debugger(); - break; - case TRAP_int3: - { -@@ -3127,6 +3122,11 @@ void vmx_vmexit_handler(struct cpu_user_ - - hvm_inject_page_fault(regs->error_code, exit_qualification); - break; -+ case TRAP_alignment_check: -+ HVMTRACE_1D(TRAP, vector); -+ __vmread(VM_EXIT_INTR_ERROR_CODE, &ecode); -+ hvm_inject_hw_exception(vector, ecode); -+ break; - case TRAP_nmi: - if ( MASK_EXTR(intr_info, INTR_INFO_INTR_TYPE_MASK) != - X86_EVENTTYPE_NMI ) ---- a/xen/include/asm-x86/hvm/hvm.h -+++ b/xen/include/asm-x86/hvm/hvm.h -@@ -385,7 +385,10 @@ static inline int hvm_event_pending(stru - (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE)) - - /* These exceptions must always be intercepted. */ --#define HVM_TRAP_MASK ((1U << TRAP_machine_check) | (1U << TRAP_invalid_op)) -+#define HVM_TRAP_MASK ((1U << TRAP_debug) | \ -+ (1U << TRAP_invalid_op) | \ -+ (1U << TRAP_alignment_check) | \ -+ (1U << TRAP_machine_check)) - - /* - * x86 event types. This enumeration is valid for: diff -Nru xen-4.6.0/debian/patches/xsa158-fix.patch xen-4.6.5/debian/patches/xsa158-fix.patch --- xen-4.6.0/debian/patches/xsa158-fix.patch 2015-12-16 11:07:33.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa158-fix.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,39 +0,0 @@ -memory: fix XSA-158 fix - -For one the uses of domu_max_order and ptdom_max_order were swapped. - -And then gcc warns about an unused result of a __must_check function -in the control part of a conditional expression when both other -expressions can be determined by the compiler to produce the same value -(see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68039), which happens -when HAS_PASSTHROUGH is undefined (i.e. for ARM on 4.4 and older). - -Signed-off-by: Jan Beulich -Acked-by: Ian Campbell - ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -55,8 +55,6 @@ static unsigned int __read_mostly ctldom_max_order = CONFIG_CTLDOM_MAX_ORDER; - static unsigned int __read_mostly hwdom_max_order = CONFIG_HWDOM_MAX_ORDER; - #ifdef HAS_PASSTHROUGH - static unsigned int __read_mostly ptdom_max_order = CONFIG_PTDOM_MAX_ORDER; --#else --# define ptdom_max_order domu_max_order - #endif - static void __init parse_max_order(const char *s) - { -@@ -75,8 +73,12 @@ custom_param("memop-max-order", parse_max_order); - - static unsigned int max_order(const struct domain *d) - { -- unsigned int order = cache_flush_permitted(d) ? domu_max_order -- : ptdom_max_order; -+ unsigned int order = domu_max_order; -+ -+#ifdef HAS_PASSTHROUGH -+ if ( cache_flush_permitted(d) && order < ptdom_max_order ) -+ order = ptdom_max_order; -+#endif - - if ( is_control_domain(d) && order < ctldom_max_order ) - order = ctldom_max_order; diff -Nru xen-4.6.0/debian/patches/xsa158.patch xen-4.6.5/debian/patches/xsa158.patch --- xen-4.6.0/debian/patches/xsa158.patch 2015-12-16 11:07:14.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa158.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,216 +0,0 @@ -memory: split and tighten maximum order permitted in memops - -Introduce and enforce separate limits for ordinary DomU, DomU with -pass-through device(s), control domain, and hardware domain. - -The DomU defaults were determined based on what so far was allowed by -multipage_allocation_permitted(). - -The x86 hwdom default was chosen based on linux-2.6.18-xen.hg c/s -1102:82782f1361a9 indicating 2Mb is not enough, plus some slack. - -The ARM hwdom default was chosen to allow 2Mb (order-9) mappings, plus -a little bit of slack. - -This is XSA-158. - -Signed-off-by: Jan Beulich -Acked-by: Ian Campbell ---- -v2: Rename command line option to "memop-max-order". Clarify domain - kinds in command line option doc. Correct its syntax description. - ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -1029,6 +1029,17 @@ with **crashinfo_maxaddr**. - Specify the threshold below which Xen will inform dom0 that the quantity of - free memory is getting low. Specifying `0` will disable this notification. - -+### memop-max-order -+> `= [][,[][,[][,]]]` -+ -+> x86 default: `9,18,12,12` -+> ARM default: `9,18,10,10` -+ -+Change the maximum order permitted for allocation (or allocation-like) -+requests issued by the various kinds of domains (in this order: -+ordinary DomU, control domain, hardware domain, and - when supported -+by the platform - DomU with pass-through device assigned). -+ - ### max\_cstate - > `= ` - ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -43,6 +43,50 @@ struct memop_args { - int preempted; /* Was the hypercall preempted? */ - }; - -+#ifndef CONFIG_CTLDOM_MAX_ORDER -+#define CONFIG_CTLDOM_MAX_ORDER CONFIG_PAGEALLOC_MAX_ORDER -+#endif -+#ifndef CONFIG_PTDOM_MAX_ORDER -+#define CONFIG_PTDOM_MAX_ORDER CONFIG_HWDOM_MAX_ORDER -+#endif -+ -+static unsigned int __read_mostly domu_max_order = CONFIG_DOMU_MAX_ORDER; -+static unsigned int __read_mostly ctldom_max_order = CONFIG_CTLDOM_MAX_ORDER; -+static unsigned int __read_mostly hwdom_max_order = CONFIG_HWDOM_MAX_ORDER; -+#ifdef HAS_PASSTHROUGH -+static unsigned int __read_mostly ptdom_max_order = CONFIG_PTDOM_MAX_ORDER; -+#else -+# define ptdom_max_order domu_max_order -+#endif -+static void __init parse_max_order(const char *s) -+{ -+ if ( *s != ',' ) -+ domu_max_order = simple_strtoul(s, &s, 0); -+ if ( *s == ',' && *++s != ',' ) -+ ctldom_max_order = simple_strtoul(s, &s, 0); -+ if ( *s == ',' && *++s != ',' ) -+ hwdom_max_order = simple_strtoul(s, &s, 0); -+#ifdef HAS_PASSTHROUGH -+ if ( *s == ',' && *++s != ',' ) -+ ptdom_max_order = simple_strtoul(s, &s, 0); -+#endif -+} -+custom_param("memop-max-order", parse_max_order); -+ -+static unsigned int max_order(const struct domain *d) -+{ -+ unsigned int order = cache_flush_permitted(d) ? domu_max_order -+ : ptdom_max_order; -+ -+ if ( is_control_domain(d) && order < ctldom_max_order ) -+ order = ctldom_max_order; -+ -+ if ( is_hardware_domain(d) && order < hwdom_max_order ) -+ order = hwdom_max_order; -+ -+ return min(order, MAX_ORDER + 0U); -+} -+ - static void increase_reservation(struct memop_args *a) - { - struct page_info *page; -@@ -55,7 +99,7 @@ static void increase_reservation(struct - a->nr_extents-1) ) - return; - -- if ( !multipage_allocation_permitted(current->domain, a->extent_order) ) -+ if ( a->extent_order > max_order(current->domain) ) - return; - - for ( i = a->nr_done; i < a->nr_extents; i++ ) -@@ -100,8 +144,8 @@ static void populate_physmap(struct memo - a->nr_extents-1) ) - return; - -- if ( a->memflags & MEMF_populate_on_demand ? a->extent_order > MAX_ORDER : -- !multipage_allocation_permitted(current->domain, a->extent_order) ) -+ if ( a->extent_order > (a->memflags & MEMF_populate_on_demand ? MAX_ORDER : -+ max_order(current->domain)) ) - return; - - for ( i = a->nr_done; i < a->nr_extents; i++ ) -@@ -285,7 +329,7 @@ static void decrease_reservation(struct - - if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, - a->nr_extents-1) || -- a->extent_order > MAX_ORDER ) -+ a->extent_order > max_order(current->domain) ) - return; - - for ( i = a->nr_done; i < a->nr_extents; i++ ) -@@ -343,13 +387,17 @@ static long memory_exchange(XEN_GUEST_HA - if ( copy_from_guest(&exch, arg, 1) ) - return -EFAULT; - -+ if ( max(exch.in.extent_order, exch.out.extent_order) > -+ max_order(current->domain) ) -+ { -+ rc = -EPERM; -+ goto fail_early; -+ } -+ - /* Various sanity checks. */ - if ( (exch.nr_exchanged > exch.in.nr_extents) || - /* Input and output domain identifiers match? */ - (exch.in.domid != exch.out.domid) || -- /* Extent orders are sensible? */ -- (exch.in.extent_order > MAX_ORDER) || -- (exch.out.extent_order > MAX_ORDER) || - /* Sizes of input and output lists do not overflow a long? */ - ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) || - ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) || -@@ -368,16 +416,6 @@ static long memory_exchange(XEN_GUEST_HA - goto fail_early; - } - -- /* Only privileged guests can allocate multi-page contiguous extents. */ -- if ( !multipage_allocation_permitted(current->domain, -- exch.in.extent_order) || -- !multipage_allocation_permitted(current->domain, -- exch.out.extent_order) ) -- { -- rc = -EPERM; -- goto fail_early; -- } -- - if ( exch.in.extent_order <= exch.out.extent_order ) - { - in_chunk_order = exch.out.extent_order - exch.in.extent_order; ---- a/xen/include/asm-arm/config.h -+++ b/xen/include/asm-arm/config.h -@@ -39,6 +39,10 @@ - - #define CONFIG_IRQ_HAS_MULTIPLE_ACTION 1 - -+#define CONFIG_PAGEALLOC_MAX_ORDER 18 -+#define CONFIG_DOMU_MAX_ORDER 9 -+#define CONFIG_HWDOM_MAX_ORDER 10 -+ - #define OPT_CONSOLE_STR "dtuart" - - #ifdef MAX_PHYS_CPUS ---- a/xen/include/asm-arm/iocap.h -+++ b/xen/include/asm-arm/iocap.h -@@ -4,10 +4,6 @@ - #define cache_flush_permitted(d) \ - (!rangeset_is_empty((d)->iomem_caps)) - --#define multipage_allocation_permitted(d, order) \ -- (((order) <= 9) || /* allow 2MB superpages */ \ -- !rangeset_is_empty((d)->iomem_caps)) -- - #endif - - /* ---- a/xen/include/asm-x86/config.h -+++ b/xen/include/asm-x86/config.h -@@ -28,9 +28,12 @@ - #define CONFIG_NUMA 1 - #define CONFIG_DISCONTIGMEM 1 - #define CONFIG_NUMA_EMU 1 --#define CONFIG_PAGEALLOC_MAX_ORDER (2 * PAGETABLE_ORDER) - #define CONFIG_DOMAIN_PAGE 1 - -+#define CONFIG_PAGEALLOC_MAX_ORDER (2 * PAGETABLE_ORDER) -+#define CONFIG_DOMU_MAX_ORDER PAGETABLE_ORDER -+#define CONFIG_HWDOM_MAX_ORDER 12 -+ - /* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */ - #define CONFIG_X86_L1_CACHE_SHIFT 7 - ---- a/xen/include/asm-x86/iocap.h -+++ b/xen/include/asm-x86/iocap.h -@@ -18,9 +18,4 @@ - (!rangeset_is_empty((d)->iomem_caps) || \ - !rangeset_is_empty((d)->arch.ioport_caps)) - --#define multipage_allocation_permitted(d, order) \ -- (((order) <= 9) || /* allow 2MB superpages */ \ -- !rangeset_is_empty((d)->iomem_caps) || \ -- !rangeset_is_empty((d)->arch.ioport_caps)) -- - #endif /* __X86_IOCAP_H__ */ diff -Nru xen-4.6.0/debian/patches/xsa159.patch xen-4.6.5/debian/patches/xsa159.patch --- xen-4.6.0/debian/patches/xsa159.patch 2015-12-16 11:08:45.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa159.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ -memory: fix XENMEM_exchange error handling - -assign_pages() can fail due to the domain getting killed in parallel, -which should not result in a hypervisor crash. - -Also delete a redundant put_gfn() - all relevant paths leading to the -"fail" label already do this (and there are also paths where it was -plain wrong). All of the put_gfn()-s got introduced by 51032ca058 -("Modify naming of queries into the p2m"), including the otherwise -unneeded initializer for k (with even a kind of misleading comment - -the compiler warning could actually have served as a hint that the use -is wrong). - -This is XSA-159. - -Signed-off-by: Jan Beulich -Acked-by: Ian Campbell - ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -334,7 +334,7 @@ static long memory_exchange(XEN_GUEST_HA - PAGE_LIST_HEAD(out_chunk_list); - unsigned long in_chunk_order, out_chunk_order; - xen_pfn_t gpfn, gmfn, mfn; -- unsigned long i, j, k = 0; /* gcc ... */ -+ unsigned long i, j, k; - unsigned int memflags = 0; - long rc = 0; - struct domain *d; -@@ -572,11 +572,12 @@ static long memory_exchange(XEN_GUEST_HA - fail: - /* Reassign any input pages we managed to steal. */ - while ( (page = page_list_remove_head(&in_chunk_list)) ) -- { -- put_gfn(d, gmfn + k--); - if ( assign_pages(d, page, 0, MEMF_no_refcount) ) -- BUG(); -- } -+ { -+ BUG_ON(!d->is_dying); -+ if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) -+ put_page(page); -+ } - - dying: - rcu_unlock_domain(d); diff -Nru xen-4.6.0/debian/patches/xsa160-4.6.patch xen-4.6.5/debian/patches/xsa160-4.6.patch --- xen-4.6.0/debian/patches/xsa160-4.6.patch 2015-12-16 11:08:59.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa160-4.6.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,66 +0,0 @@ -From adcbd15b1aec8367f790774c998db199c9b577bf Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 18 Nov 2015 15:34:54 +0000 -Subject: [PATCH] libxl: Fix bootloader-related virtual memory leak on pv - build failure - -The bootloader may call libxl__file_reference_map(), which mmap's the -pv_kernel and pv_ramdisk into process memory. This was only unmapped, -however, on the success path of libxl__build_pv(). If there were a -failure anywhere between libxl_bootloader.c:parse_bootloader_result() -and the end of libxl__build_pv(), the calls to -libxl__file_reference_unmap() would be skipped, leaking the mapped -virtual memory. - -Ideally this would be fixed by adding the unmap calls to the -destruction path for libxl__domain_build_state. Unfortunately the -lifetime of the libxl__domain_build_state is opaque, and it doesn't -have a proper destruction path. But, the only thing in it that isn't -from the gc are these bootloader references, and they are only ever -set for one libxl__domain_build_state, the one which is -libxl__domain_create_state.build_state. - -So we can clean up in the exit path from libxl__domain_create_*, which -always comes through domcreate_complete. - -Remove the now-redundant unmaps in libxl__build_pv's success path. - -This is XSA-160. - -Acked-by: Ian Campbell ---- - tools/libxl/libxl_create.c | 3 +++ - tools/libxl/libxl_dom.c | 3 --- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c -index f5771da..278b9ed 100644 ---- a/tools/libxl/libxl_create.c -+++ b/tools/libxl/libxl_create.c -@@ -1484,6 +1484,9 @@ static void domcreate_complete(libxl__egc *egc, - libxl_domain_config *const d_config = dcs->guest_config; - libxl_domain_config *d_config_saved = &dcs->guest_config_saved; - -+ libxl__file_reference_unmap(&dcs->build_state.pv_kernel); -+ libxl__file_reference_unmap(&dcs->build_state.pv_ramdisk); -+ - if (!rc && d_config->b_info.exec_ssidref) - rc = xc_flask_relabel_domain(CTX->xch, dcs->guest_domid, d_config->b_info.exec_ssidref); - -diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c -index 8019f4e..2da3ac4 100644 ---- a/tools/libxl/libxl_dom.c -+++ b/tools/libxl/libxl_dom.c -@@ -750,9 +750,6 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid, - state->store_mfn = xc_dom_p2m_host(dom, dom->xenstore_pfn); - } - -- libxl__file_reference_unmap(&state->pv_kernel); -- libxl__file_reference_unmap(&state->pv_ramdisk); -- - ret = 0; - out: - xc_dom_release(dom); --- -1.7.10.4 - diff -Nru xen-4.6.0/debian/patches/xsa165-4.6.patch xen-4.6.5/debian/patches/xsa165-4.6.patch --- xen-4.6.0/debian/patches/xsa165-4.6.patch 2015-12-16 11:09:18.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa165-4.6.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,85 +0,0 @@ -x86: don't leak ST(n)/XMMn values to domains first using them - -FNINIT doesn't alter these registers, and hence using it is -insufficient to initialize a guest's initial state. - -This is XSA-165. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -851,6 +851,17 @@ int arch_set_info_guest( - if ( v->arch.xsave_area ) - v->arch.xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE; - } -+ else if ( v->arch.xsave_area ) -+ memset(&v->arch.xsave_area->xsave_hdr, 0, -+ sizeof(v->arch.xsave_area->xsave_hdr)); -+ else -+ { -+ typeof(v->arch.xsave_area->fpu_sse) *fpu_sse = v->arch.fpu_ctxt; -+ -+ memset(fpu_sse, 0, sizeof(*fpu_sse)); -+ fpu_sse->fcw = FCW_DEFAULT; -+ fpu_sse->mxcsr = MXCSR_DEFAULT; -+ } - - if ( !compat ) - { ---- a/xen/arch/x86/i387.c -+++ b/xen/arch/x86/i387.c -@@ -17,19 +17,6 @@ - #include - #include - --static void fpu_init(void) --{ -- unsigned long val; -- -- asm volatile ( "fninit" ); -- if ( cpu_has_xmm ) -- { -- /* load default value into MXCSR control/status register */ -- val = MXCSR_DEFAULT; -- asm volatile ( "ldmxcsr %0" : : "m" (val) ); -- } --} -- - /*******************************/ - /* FPU Restore Functions */ - /*******************************/ -@@ -248,15 +235,8 @@ void vcpu_restore_fpu_lazy(struct vcpu * - - if ( cpu_has_xsave ) - fpu_xrstor(v, XSTATE_LAZY); -- else if ( v->fpu_initialised ) -- { -- if ( cpu_has_fxsr ) -- fpu_fxrstor(v); -- else -- fpu_frstor(v); -- } - else -- fpu_init(); -+ fpu_fxrstor(v); - - v->fpu_initialised = 1; - v->fpu_dirtied = 1; -@@ -313,7 +293,14 @@ int vcpu_init_fpu(struct vcpu *v) - else - { - v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse), 16); -- if ( !v->arch.fpu_ctxt ) -+ if ( v->arch.fpu_ctxt ) -+ { -+ typeof(v->arch.xsave_area->fpu_sse) *fpu_sse = v->arch.fpu_ctxt; -+ -+ fpu_sse->fcw = FCW_DEFAULT; -+ fpu_sse->mxcsr = MXCSR_DEFAULT; -+ } -+ else - rc = -ENOMEM; - } - diff -Nru xen-4.6.0/debian/patches/xsa166.patch xen-4.6.5/debian/patches/xsa166.patch --- xen-4.6.0/debian/patches/xsa166.patch 2015-12-16 11:09:30.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa166.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ -x86/HVM: avoid reading ioreq state more than once - -Otherwise, especially when the compiler chooses to translate the -switch() to a jump table, unpredictable behavior (and in the jump table -case arbitrary code execution) can result. - -This is XSA-166. - -Signed-off-by: Jan Beulich -Acked-by: Ian Campbell - ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -460,7 +460,10 @@ static bool_t hvm_wait_for_io(struct hvm - { - while ( sv->pending ) - { -- switch ( p->state ) -+ unsigned int state = p->state; -+ -+ rmb(); -+ switch ( state ) - { - case STATE_IOREQ_NONE: - /* -@@ -471,18 +474,15 @@ static bool_t hvm_wait_for_io(struct hvm - hvm_io_assist(sv, ~0ul); - break; - case STATE_IORESP_READY: /* IORESP_READY -> NONE */ -- rmb(); /* see IORESP_READY /then/ read contents of ioreq */ - p->state = STATE_IOREQ_NONE; - hvm_io_assist(sv, p->data); - break; - case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */ - case STATE_IOREQ_INPROCESS: -- wait_on_xen_event_channel(sv->ioreq_evtchn, -- (p->state != STATE_IOREQ_READY) && -- (p->state != STATE_IOREQ_INPROCESS)); -+ wait_on_xen_event_channel(sv->ioreq_evtchn, p->state != state); - break; - default: -- gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state); -+ gdprintk(XENLOG_ERR, "Weird HVM iorequest state %u\n", state); - sv->pending = 0; - domain_crash(sv->vcpu->domain); - return 0; /* bail */ diff -Nru xen-4.6.0/debian/patches/xsa167-4.6.patch xen-4.6.5/debian/patches/xsa167-4.6.patch --- xen-4.6.0/debian/patches/xsa167-4.6.patch 2016-01-22 08:58:40.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa167-4.6.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,77 +0,0 @@ -x86/mm: PV superpage handling lacks sanity checks - -MMUEXT_{,UN}MARK_SUPER fail to check the input MFN for validity before -dereferencing pointers into the superpage frame table. - -get_superpage() has a similar issue. - -This is XSA-167. - -Reported-by: Qinghao Tang -Signed-off-by: Jan Beulich -Acked-by: Ian Campbell - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2624,6 +2624,9 @@ int get_superpage(unsigned long mfn, str - - ASSERT(opt_allow_superpage); - -+ if ( !mfn_valid(mfn | (L1_PAGETABLE_ENTRIES - 1)) ) -+ return -EINVAL; -+ - spage = mfn_to_spage(mfn); - y = spage->type_info; - do { -@@ -3401,42 +3404,26 @@ long do_mmuext_op( - } - - case MMUEXT_MARK_SUPER: -+ case MMUEXT_UNMARK_SUPER: - { - unsigned long mfn = op.arg1.mfn; - -- if ( unlikely(d != pg_owner) ) -- rc = -EPERM; -- else if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) -- { -- MEM_LOG("Unaligned superpage reference mfn %lx", mfn); -- okay = 0; -- } -- else if ( !opt_allow_superpage ) -+ if ( !opt_allow_superpage ) - { - MEM_LOG("Superpages disallowed"); - rc = -ENOSYS; - } -- else -- rc = mark_superpage(mfn_to_spage(mfn), d); -- break; -- } -- -- case MMUEXT_UNMARK_SUPER: -- { -- unsigned long mfn = op.arg1.mfn; -- -- if ( unlikely(d != pg_owner) ) -+ else if ( unlikely(d != pg_owner) ) - rc = -EPERM; -- else if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) -+ else if ( mfn & (L1_PAGETABLE_ENTRIES - 1) ) - { - MEM_LOG("Unaligned superpage reference mfn %lx", mfn); -- okay = 0; -- } -- else if ( !opt_allow_superpage ) -- { -- MEM_LOG("Superpages disallowed"); -- rc = -ENOSYS; -+ rc = -EINVAL; - } -+ else if ( !mfn_valid(mfn | (L1_PAGETABLE_ENTRIES - 1)) ) -+ rc = -EINVAL; -+ else if ( op.cmd == MMUEXT_MARK_SUPER ) -+ rc = mark_superpage(mfn_to_spage(mfn), d); - else - rc = unmark_superpage(mfn_to_spage(mfn)); - break; diff -Nru xen-4.6.0/debian/patches/xsa168.patch xen-4.6.5/debian/patches/xsa168.patch --- xen-4.6.0/debian/patches/xsa168.patch 2016-01-22 08:58:55.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa168.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ -x86/VMX: prevent INVVPID failure due to non-canonical guest address - -While INVLPG (and on SVM INVLPGA) don't fault on non-canonical -addresses, INVVPID fails (in the "individual address" case) when passed -such an address. - -Since such intercepted INVLPG are effectively no-ops anyway, don't fix -this in vmx_invlpg_intercept(), but instead have paging_invlpg() never -return true in such a case. - -This is XSA-168. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Acked-by: Ian Campbell - ---- a/xen/include/asm-x86/paging.h -+++ b/xen/include/asm-x86/paging.h -@@ -245,7 +245,7 @@ paging_fault(unsigned long va, struct cp - * or 0 if it's safe not to do so. */ - static inline int paging_invlpg(struct vcpu *v, unsigned long va) - { -- return paging_get_hostmode(v)->invlpg(v, va); -+ return is_canonical_address(va) && paging_get_hostmode(v)->invlpg(v, va); - } - - /* Translate a guest virtual address to the frame number that the diff -Nru xen-4.6.0/debian/patches/xsa169.patch xen-4.6.5/debian/patches/xsa169.patch --- xen-4.6.0/debian/patches/xsa169.patch 2016-01-22 08:59:02.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa169.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,33 +0,0 @@ -x86: make debug output consistent in hvm_set_callback_via - -The unconditional printks in the switch statement of the -hvm_set_callback_via function results in Xen log spam in non debug -versions of Xen. The printks are for debug output only so conditionally -compile the entire switch statement on debug versions of Xen only. - -This is XSA-169. - -Signed-off-by: Malcolm Crossley -Reviewed-by: Jan Beulich -Acked-by: Ian Campbell - ---- a/xen/arch/x86/hvm/irq.c -+++ b/xen/arch/x86/hvm/irq.c -@@ -386,7 +386,8 @@ void hvm_set_callback_via(struct domain - - spin_unlock(&d->arch.hvm_domain.irq_lock); - -- dprintk(XENLOG_G_INFO, "Dom%u callback via changed to ", d->domain_id); -+#ifndef NDEBUG -+ printk(XENLOG_G_INFO "Dom%u callback via changed to ", d->domain_id); - switch ( via_type ) - { - case HVMIRQ_callback_gsi: -@@ -402,6 +403,7 @@ void hvm_set_callback_via(struct domain - printk("None\n"); - break; - } -+#endif - } - - struct hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v) diff -Nru xen-4.6.0/debian/patches/xsa170.patch xen-4.6.5/debian/patches/xsa170.patch --- xen-4.6.0/debian/patches/xsa170.patch 2016-02-16 14:31:04.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa170.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,78 +0,0 @@ -x86/VMX: sanitize rIP before re-entering guest - -... to prevent guest user mode arranging for a guest crash (due to -failed VM entry). (On the AMD system I checked, hardware is doing -exactly the canonicalization being added here.) - -Note that fixing this in an architecturally correct way would be quite -a bit more involved: Making the x86 instruction emulator check all -branch targets for validity, plus dealing with invalid rIP resulting -from update_guest_eip() or incoming directly during a VM exit. The only -way to get the latter right would be by not having hardware do the -injection. - -Note further that there are a two early returns from -vmx_vmexit_handler(): One (through vmx_failed_vmentry()) leads to -domain_crash() anyway, and the other covers real mode only and can -neither occur with a non-canonical rIP nor result in an altered rIP, -so we don't need to force those paths through the checking logic. - -This is XSA-170. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Tested-by: Andrew Cooper - ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -2968,7 +2968,7 @@ static int vmx_handle_apic_write(void) - void vmx_vmexit_handler(struct cpu_user_regs *regs) - { - unsigned long exit_qualification, exit_reason, idtv_info, intr_info = 0; -- unsigned int vector = 0; -+ unsigned int vector = 0, mode; - struct vcpu *v = current; - - __vmread(GUEST_RIP, ®s->rip); -@@ -3566,6 +3566,41 @@ void vmx_vmexit_handler(struct cpu_user_ - out: - if ( nestedhvm_vcpu_in_guestmode(v) ) - nvmx_idtv_handling(); -+ -+ /* -+ * VM entry will fail (causing the guest to get crashed) if rIP (and -+ * rFLAGS, but we don't have an issue there) doesn't meet certain -+ * criteria. As we must not allow less than fully privileged mode to have -+ * such an effect on the domain, we correct rIP in that case (accepting -+ * this not being architecturally correct behavior, as the injected #GP -+ * fault will then not see the correct [invalid] return address). -+ * And since we know the guest will crash, we crash it right away if it -+ * already is in most privileged mode. -+ */ -+ mode = vmx_guest_x86_mode(v); -+ if ( mode == 8 ? !is_canonical_address(regs->rip) -+ : regs->rip != regs->_eip ) -+ { -+ struct segment_register ss; -+ -+ gprintk(XENLOG_WARNING, "Bad rIP %lx for mode %u\n", regs->rip, mode); -+ -+ vmx_get_segment_register(v, x86_seg_ss, &ss); -+ if ( ss.attr.fields.dpl ) -+ { -+ __vmread(VM_ENTRY_INTR_INFO, &intr_info); -+ if ( !(intr_info & INTR_INFO_VALID_MASK) ) -+ hvm_inject_hw_exception(TRAP_gp_fault, 0); -+ /* Need to fix rIP nevertheless. */ -+ if ( mode == 8 ) -+ regs->rip = (long)(regs->rip << (64 - VADDR_BITS)) >> -+ (64 - VADDR_BITS); -+ else -+ regs->rip = regs->_eip; -+ } -+ else -+ domain_crash(v->domain); -+ } - } - - void vmx_vmenter_helper(const struct cpu_user_regs *regs) diff -Nru xen-4.6.0/debian/patches/xsa172.patch xen-4.6.5/debian/patches/xsa172.patch --- xen-4.6.0/debian/patches/xsa172.patch 2016-06-01 13:06:37.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa172.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,39 +0,0 @@ -x86: fix information leak on AMD CPUs - -The fix for XSA-52 was wrong, and so was the change synchronizing that -new behavior to the FXRSTOR logic: AMD's manuals explictly state that -writes to the ES bit are ignored, and it instead gets calculated from -the exception and mask bits (it gets set whenever there is an unmasked -exception, and cleared otherwise). Hence we need to follow that model -in our workaround. - -This is XSA-172. - -The first hunk (xen/arch/x86/i387.c:fpu_fxrstor) is CVE-2016-3159. -The second hunk (xen/arch/x86/xstate.c:xrstor) is CVE-2016-3158. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/i387.c -+++ b/xen/arch/x86/i387.c -@@ -49,7 +49,7 @@ static inline void fpu_fxrstor(struct vc - * sometimes new user value. Both should be ok. Use the FPU saved - * data block as a safe address because it should be in L1. - */ -- if ( !(fpu_ctxt->fsw & 0x0080) && -+ if ( !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) && - boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) - { - asm volatile ( "fnclex\n\t" ---- a/xen/arch/x86/xstate.c -+++ b/xen/arch/x86/xstate.c -@@ -344,7 +344,7 @@ void xrstor(struct vcpu *v, uint64_t mas - * data block as a safe address because it should be in L1. - */ - if ( (mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) && -- !(ptr->fpu_sse.fsw & 0x0080) && -+ !(ptr->fpu_sse.fsw & ~ptr->fpu_sse.fcw & 0x003f) && - boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) - asm volatile ( "fnclex\n\t" /* clear exceptions */ - "ffree %%st(7)\n\t" /* clear stack tag */ diff -Nru xen-4.6.0/debian/patches/xsa173-4.6.patch xen-4.6.5/debian/patches/xsa173-4.6.patch --- xen-4.6.0/debian/patches/xsa173-4.6.patch 2016-06-01 13:07:14.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa173-4.6.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,244 +0,0 @@ -commit 54a4651cb4e744960fb375ed99909d7dfb943caf -Author: Tim Deegan -Date: Wed Mar 16 16:51:27 2016 +0000 - - x86: limit GFNs to 32 bits for shadowed superpages. - - Superpage shadows store the shadowed GFN in the backpointer field, - which for non-BIGMEM builds is 32 bits wide. Shadowing a superpage - mapping of a guest-physical address above 2^44 would lead to the GFN - being truncated there, and a crash when we come to remove the shadow - from the hash table. - - Track the valid width of a GFN for each guest, including reporting it - through CPUID, and enforce it in the shadow pagetables. Set the - maximum witth to 32 for guests where this truncation could occur. - - This is XSA-173. - - Signed-off-by: Tim Deegan - Signed-off-by: Jan Beulich - -Reported-by: Ling Liu -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 35ef21b..528c283 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -38,6 +38,7 @@ integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx); - const struct cpu_dev *__read_mostly cpu_devs[X86_VENDOR_NUM] = {}; - - unsigned int paddr_bits __read_mostly = 36; -+unsigned int hap_paddr_bits __read_mostly = 36; - - /* - * Default host IA32_CR_PAT value to cover all memory types. -@@ -211,7 +212,7 @@ static void __init early_cpu_detect(void) - - static void __cpuinit generic_identify(struct cpuinfo_x86 *c) - { -- u32 tfms, capability, excap, ebx; -+ u32 tfms, capability, excap, ebx, eax; - - /* Get vendor name */ - cpuid(0x00000000, &c->cpuid_level, -@@ -248,8 +249,11 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) - } - if ( c->extended_cpuid_level >= 0x80000004 ) - get_model_name(c); /* Default name */ -- if ( c->extended_cpuid_level >= 0x80000008 ) -- paddr_bits = cpuid_eax(0x80000008) & 0xff; -+ if ( c->extended_cpuid_level >= 0x80000008 ) { -+ eax = cpuid_eax(0x80000008); -+ paddr_bits = eax & 0xff; -+ hap_paddr_bits = ((eax >> 16) & 0xff) ?: paddr_bits; -+ } - } - - /* Might lift BIOS max_leaf=3 limit. */ -diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c -index e200aab..0b4d9f0 100644 ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -4567,8 +4567,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, - break; - - case 0x80000008: -- count = cpuid_eax(0x80000008); -- count = (count >> 16) & 0xff ?: count & 0xff; -+ count = d->arch.paging.gfn_bits + PAGE_SHIFT; - if ( (*eax & 0xff) > count ) - *eax = (*eax & ~0xff) | count; - -diff --git a/xen/arch/x86/mm/guest_walk.c b/xen/arch/x86/mm/guest_walk.c -index 773454d..06543d3 100644 ---- a/xen/arch/x86/mm/guest_walk.c -+++ b/xen/arch/x86/mm/guest_walk.c -@@ -93,6 +93,12 @@ void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn, - struct page_info *page; - void *map; - -+ if ( gfn_x(gfn) >> p2m->domain->arch.paging.gfn_bits ) -+ { -+ *rc = _PAGE_INVALID_BIT; -+ return NULL; -+ } -+ - /* Translate the gfn, unsharing if shared */ - page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), p2mt, NULL, - q); -@@ -326,20 +332,8 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, - flags &= ~_PAGE_PAT; - - if ( gfn_x(start) & GUEST_L2_GFN_MASK & ~0x1 ) -- { --#if GUEST_PAGING_LEVELS == 2 -- /* -- * Note that _PAGE_INVALID_BITS is zero in this case, yielding a -- * no-op here. -- * -- * Architecturally, the walk should fail if bit 21 is set (others -- * aren't being checked at least in PSE36 mode), but we'll ignore -- * this here in order to avoid specifying a non-natural, non-zero -- * _PAGE_INVALID_BITS value just for that case. -- */ --#endif - rc |= _PAGE_INVALID_BITS; -- } -+ - /* Increment the pfn by the right number of 4k pages. - * Mask out PAT and invalid bits. */ - start = _gfn((gfn_x(start) & ~GUEST_L2_GFN_MASK) + -@@ -422,5 +416,11 @@ set_ad: - put_page(mfn_to_page(mfn_x(gw->l1mfn))); - } - -+ /* If this guest has a restricted physical address space then the -+ * target GFN must fit within it. */ -+ if ( !(rc & _PAGE_PRESENT) -+ && gfn_x(guest_l1e_get_gfn(gw->l1e)) >> d->arch.paging.gfn_bits ) -+ rc |= _PAGE_INVALID_BITS; -+ - return rc; - } -diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c -index 6eb2167..f3475c6 100644 ---- a/xen/arch/x86/mm/hap/hap.c -+++ b/xen/arch/x86/mm/hap/hap.c -@@ -448,6 +448,8 @@ void hap_domain_init(struct domain *d) - { - INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist); - -+ d->arch.paging.gfn_bits = hap_paddr_bits - PAGE_SHIFT; -+ - /* Use HAP logdirty mechanism. */ - paging_log_dirty_init(d, hap_enable_log_dirty, - hap_disable_log_dirty, -diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c -index bad8360..98d0d2c 100644 ---- a/xen/arch/x86/mm/shadow/common.c -+++ b/xen/arch/x86/mm/shadow/common.c -@@ -51,6 +51,16 @@ int shadow_domain_init(struct domain *d, unsigned int domcr_flags) - INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelist); - INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows); - -+ d->arch.paging.gfn_bits = paddr_bits - PAGE_SHIFT; -+#ifndef CONFIG_BIGMEM -+ /* -+ * Shadowed superpages store GFNs in 32-bit page_info fields. -+ * Note that we cannot use guest_supports_superpages() here. -+ */ -+ if ( !is_pv_domain(d) || opt_allow_superpage ) -+ d->arch.paging.gfn_bits = 32; -+#endif -+ - /* Use shadow pagetables for log-dirty support */ - paging_log_dirty_init(d, sh_enable_log_dirty, - sh_disable_log_dirty, sh_clean_dirty_bitmap); -diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c -index 43c9488..71477fe 100644 ---- a/xen/arch/x86/mm/shadow/multi.c -+++ b/xen/arch/x86/mm/shadow/multi.c -@@ -525,7 +525,8 @@ _sh_propagate(struct vcpu *v, - ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); - - /* Check there's something for the shadows to map to */ -- if ( !p2m_is_valid(p2mt) && !p2m_is_grant(p2mt) ) -+ if ( (!p2m_is_valid(p2mt) && !p2m_is_grant(p2mt)) -+ || gfn_x(target_gfn) >> d->arch.paging.gfn_bits ) - { - *sp = shadow_l1e_empty(); - goto done; -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index c6c6e71..74c3a52 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -193,6 +193,9 @@ struct paging_domain { - /* log dirty support */ - struct log_dirty_domain log_dirty; - -+ /* Number of valid bits in a gfn. */ -+ unsigned int gfn_bits; -+ - /* preemption handling */ - struct { - const struct domain *dom; -diff --git a/xen/include/asm-x86/guest_pt.h b/xen/include/asm-x86/guest_pt.h -index f8a0d76..b5db401 100644 ---- a/xen/include/asm-x86/guest_pt.h -+++ b/xen/include/asm-x86/guest_pt.h -@@ -210,15 +210,17 @@ guest_supports_nx(struct vcpu *v) - } - - --/* Some bits are invalid in any pagetable entry. */ --#if GUEST_PAGING_LEVELS == 2 --#define _PAGE_INVALID_BITS (0) --#elif GUEST_PAGING_LEVELS == 3 --#define _PAGE_INVALID_BITS \ -- get_pte_flags(((1ull<<63) - 1) & ~((1ull< -Date: Wed, 27 Apr 2016 15:50:01 +0100 -Subject: [PATCH 01/12] libxl: Record backend/frontend paths in /libxl/$DOMID - -This gives us a record of all the backends we have set up for a -domain, which is separate from the frontends in - /local/domain/$DOMID/device. - -In particular: - -1. A guest has write permission for the frontend path: - /local/domain/$DOMID/device/$KIND/$DEVID -which means that the guest can completely delete the frontend. -(They can't recreate it because they don't have write permission -on the containing directory.) - -2. A guest has write permission for the backend path recorded in the -frontend, ie, it can write to - /local/domain/$DOMID/device/$KIND/$DEVID/backend -which means that the guest can break the association between -frontend and backend. - -So we can't rely on iterating over the frontends to find all the -backends, or examining a frontend to discover how a device is -configured. - -So, have libxl__device_generic_add record the frontend and backend -paths in /libxl/$DOMID/device, and have libxl__device_destroy remove -them again. - -Create the containing directory /libxl/GUEST/device in -libxl__domain_make. The already existing xs_rm in devices_destroy_cb -will take care of removing it. - -This is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- -v2: Correct actual path computation (!) -v3: Correct actual path computation - really this time (!) - -Conflicts: - tools/libxl/libxl_create.c ---- - docs/misc/xenstore-paths.markdown | 15 +++++++++++++++ - tools/libxl/libxl_create.c | 2 ++ - tools/libxl/libxl_device.c | 36 ++++++++++++++++++++++++++++++++++-- - tools/libxl/libxl_internal.h | 1 + - 4 files changed, 52 insertions(+), 2 deletions(-) - -diff --git a/docs/misc/xenstore-paths.markdown b/docs/misc/xenstore-paths.markdown -index d94ea9d..276273d 100644 ---- a/docs/misc/xenstore-paths.markdown -+++ b/docs/misc/xenstore-paths.markdown -@@ -389,6 +389,21 @@ The guest's virtual time offset from UTC in seconds. - - ### libxl Specific Paths - -+#### /libxl/$DOMID/device/$KIND/$DEVID -+ -+Created by libxl for every frontend/backend pair created for $DOMID. -+Used by libxl for enumeration and management of the device. -+ -+#### /libxl/$DOMID/device/$KIND/$DEVID/frontend -+ -+Path in xenstore to the frontend, normally -+/local/domain/$DOMID/device/$KIND/$DEVID -+ -+#### /libxl/$DOMID/device/$KIND/$DEVID/backend -+ -+Path in xenstore to the backend, normally -+/local/domain/$BACKEND_DOMID/backend/$KIND/$DOMID/$DEVID -+ - #### /libxl/$DOMID/dm-version ("qemu\_xen"|"qemu\_xen\_traditional") = [n,INTERNAL] - - The device model version for a domain. -diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c -index 350e274..c6862b8 100644 ---- a/tools/libxl/libxl_create.c -+++ b/tools/libxl/libxl_create.c -@@ -591,6 +591,8 @@ retry_transaction: - - xs_rm(ctx->xsh, t, libxl_path); - libxl__xs_mkdir(gc, t, libxl_path, noperm, ARRAY_SIZE(noperm)); -+ libxl__xs_mkdir(gc, t, GCSPRINTF("%s/device", libxl_path), -+ noperm, ARRAY_SIZE(noperm)); - - xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/vm", dom_path), vm_path, strlen(vm_path)); - rc = libxl__domain_rename(gc, *domid, 0, info->name, t); -diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c -index 8bb5e93..f1af411 100644 ---- a/tools/libxl/libxl_device.c -+++ b/tools/libxl/libxl_device.c -@@ -40,6 +40,15 @@ char *libxl__device_backend_path(libxl__gc *gc, libxl__device *device) - device->domid, device->devid); - } - -+char *libxl__device_libxl_path(libxl__gc *gc, libxl__device *device) -+{ -+ char *libxl_dom_path = libxl__xs_libxl_path(gc, device->domid); -+ -+ return GCSPRINTF("%s/device/%s/%d", libxl_dom_path, -+ libxl__device_kind_to_string(device->kind), -+ device->devid); -+} -+ - /* Returns 1 if device exists, 0 if not, ERROR_* (<0) on error. */ - int libxl__device_exists(libxl__gc *gc, xs_transaction_t t, - libxl__device *device) -@@ -105,14 +114,16 @@ int libxl__device_generic_add(libxl__gc *gc, xs_transaction_t t, - libxl__device *device, char **bents, char **fents, char **ro_fents) - { - libxl_ctx *ctx = libxl__gc_owner(gc); -- char *frontend_path, *backend_path; -+ char *frontend_path, *backend_path, *libxl_path; - struct xs_permissions frontend_perms[2]; - struct xs_permissions ro_frontend_perms[2]; - struct xs_permissions backend_perms[2]; - int create_transaction = t == XBT_NULL; -+ int rc; - - frontend_path = libxl__device_frontend_path(gc, device); - backend_path = libxl__device_backend_path(gc, device); -+ libxl_path = libxl__device_libxl_path(gc, device); - - frontend_perms[0].id = device->domid; - frontend_perms[0].perms = XS_PERM_NONE; -@@ -127,8 +138,22 @@ int libxl__device_generic_add(libxl__gc *gc, xs_transaction_t t, - retry_transaction: - if (create_transaction) - t = xs_transaction_start(ctx->xsh); -+ - /* FIXME: read frontend_path and check state before removing stuff */ - -+ rc = libxl__xs_rm_checked(gc, t, libxl_path); -+ if (rc) goto out; -+ -+ rc = libxl__xs_write_checked(gc, t, GCSPRINTF("%s/frontend",libxl_path), -+ frontend_path); -+ if (rc) goto out; -+ -+ rc = libxl__xs_write_checked(gc, t, GCSPRINTF("%s/backend",libxl_path), -+ backend_path); -+ if (rc) goto out; -+ -+ /* xxx much of this function lacks error checks! */ -+ - if (fents || ro_fents) { - xs_rm(ctx->xsh, t, frontend_path); - xs_mkdir(ctx->xsh, t, frontend_path); -@@ -174,6 +199,11 @@ retry_transaction: - } - } - return 0; -+ -+ out: -+ if (create_transaction && t) -+ libxl__xs_transaction_abort(gc, &t); -+ return rc; - } - - typedef struct { -@@ -572,6 +602,7 @@ int libxl__device_destroy(libxl__gc *gc, libxl__device *dev) - { - const char *be_path = libxl__device_backend_path(gc, dev); - const char *fe_path = libxl__device_frontend_path(gc, dev); -+ const char *libxl_path = libxl__device_libxl_path(gc, dev); - const char *tapdisk_path = GCSPRINTF("%s/%s", be_path, "tapdisk-params"); - const char *tapdisk_params; - xs_transaction_t t = 0; -@@ -592,9 +623,10 @@ int libxl__device_destroy(libxl__gc *gc, libxl__device *dev) - if (domid == LIBXL_TOOLSTACK_DOMID) { - /* - * The toolstack domain is in charge of removing the -- * frontend path. -+ * frontend and libxl paths. - */ - libxl__xs_path_cleanup(gc, t, fe_path); -+ libxl__xs_path_cleanup(gc, t, libxl_path); - } - if (dev->backend_domid == domid) { - /* -diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h -index 1699f32..38af3ac 100644 ---- a/tools/libxl/libxl_internal.h -+++ b/tools/libxl/libxl_internal.h -@@ -1152,6 +1152,7 @@ _hidden int libxl__device_generic_add(libxl__gc *gc, xs_transaction_t t, - libxl__device *device, char **bents, char **fents, char **ro_fents); - _hidden char *libxl__device_backend_path(libxl__gc *gc, libxl__device *device); - _hidden char *libxl__device_frontend_path(libxl__gc *gc, libxl__device *device); -+_hidden char *libxl__device_libxl_path(libxl__gc *gc, libxl__device *device); - _hidden int libxl__parse_backend_path(libxl__gc *gc, const char *path, - libxl__device *dev); - _hidden int libxl__device_destroy(libxl__gc *gc, libxl__device *dev); --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0002-libxl-Provide-libxl__backendpath_parse_domid.patch xen-4.6.5/debian/patches/xsa175-4.6-0002-libxl-Provide-libxl__backendpath_parse_domid.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0002-libxl-Provide-libxl__backendpath_parse_domid.patch 2016-06-01 13:09:58.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0002-libxl-Provide-libxl__backendpath_parse_domid.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,62 +0,0 @@ -From fd2317e415eab76f20d11c6518fca9042fe117bc Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 27 Apr 2016 16:34:19 +0100 -Subject: [PATCH 02/12] libxl: Provide libxl__backendpath_parse_domid - -Multiple places in libxl need to figure out the backend domid of a -device. This can be discovered easily by looking at the backend path, -which always starts /local/domain/$backend_domid/. - -There are no call sites yet. - -This is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl_device.c | 15 +++++++++++++++ - tools/libxl/libxl_internal.h | 2 ++ - 2 files changed, 17 insertions(+) - -diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c -index f1af411..bfd4bdd 100644 ---- a/tools/libxl/libxl_device.c -+++ b/tools/libxl/libxl_device.c -@@ -288,6 +288,21 @@ static int disk_try_backend(disk_try_backend_args *a, - return 0; - } - -+int libxl__backendpath_parse_domid(libxl__gc *gc, const char *be_path, -+ libxl_domid *domid_out) { -+ int r; -+ unsigned int domid_sc; -+ char delim_sc; -+ -+ r = sscanf(be_path, "/local/domain/%u%c", &domid_sc, &delim_sc); -+ if (!(r==2 && delim_sc=='/')) { -+ LOG(ERROR, "internal error: backend path %s unparseable!", be_path); -+ return ERROR_FAIL; -+ } -+ *domid_out = domid_sc; -+ return 0; -+} -+ - int libxl__device_disk_set_backend(libxl__gc *gc, libxl_device_disk *disk) { - libxl_disk_backend ok; - disk_try_backend_args a; -diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h -index 38af3ac..f034f63 100644 ---- a/tools/libxl/libxl_internal.h -+++ b/tools/libxl/libxl_internal.h -@@ -683,6 +683,8 @@ _hidden bool libxl__xs_mkdir(libxl__gc *gc, xs_transaction_t t, - - _hidden char *libxl__xs_libxl_path(libxl__gc *gc, uint32_t domid); - -+_hidden int libxl__backendpath_parse_domid(libxl__gc *gc, const char *be_path, -+ libxl_domid *domid_out); - - /*----- "checked" xenstore access functions -----*/ - /* Each of these functions will check that it succeeded; if it --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0003-libxl-Do-not-trust-frontend-in-libxl__devices_destro.patch xen-4.6.5/debian/patches/xsa175-4.6-0003-libxl-Do-not-trust-frontend-in-libxl__devices_destro.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0003-libxl-Do-not-trust-frontend-in-libxl__devices_destro.patch 2016-06-01 13:10:20.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0003-libxl-Do-not-trust-frontend-in-libxl__devices_destro.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,77 +0,0 @@ -From 5214e86aec5cce54f6c3621f487df3f1ecb32c52 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Tue, 3 May 2016 18:39:36 +0100 -Subject: [PATCH 03/12] libxl: Do not trust frontend in libxl__devices_destroy - -We need to enumerate the devices we have provided to a domain, without -trusting the guest-writeable (or, at least, guest-deletable) frontend -paths. - -Instead, enumerate via, and read the backend path from, /libxl. - -The console /libxl path is regular, so the special case for console 0 -is not relevant any more: /libxl/GUEST/device/console/0 will be found, -and then libxl__device_destroy will DTRT to the right frontend path. - -This is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl_device.c | 22 +++------------------- - 1 file changed, 3 insertions(+), 19 deletions(-) - -diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c -index bfd4bdd..f4e907f 100644 ---- a/tools/libxl/libxl_device.c -+++ b/tools/libxl/libxl_device.c -@@ -685,7 +685,7 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs) - libxl__multidev_begin(ao, multidev); - multidev->callback = devices_remove_callback; - -- path = GCSPRINTF("/local/domain/%d/device", domid); -+ path = GCSPRINTF("/libxl/%d/device", domid); - kinds = libxl__xs_directory(gc, XBT_NULL, path, &num_kinds); - if (!kinds) { - if (errno != ENOENT) { -@@ -698,12 +698,12 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs) - if (libxl__device_kind_from_string(kinds[i], &kind)) - continue; - -- path = GCSPRINTF("/local/domain/%d/device/%s", domid, kinds[i]); -+ path = GCSPRINTF("/libxl/%d/device/%s", domid, kinds[i]); - devs = libxl__xs_directory(gc, XBT_NULL, path, &num_dev_xsentries); - if (!devs) - continue; - for (j = 0; j < num_dev_xsentries; j++) { -- path = GCSPRINTF("/local/domain/%d/device/%s/%s/backend", -+ path = GCSPRINTF("/libxl/%d/device/%s/%s/backend", - domid, kinds[i], devs[j]); - path = libxl__xs_read(gc, XBT_NULL, path); - GCNEW(dev); -@@ -728,22 +728,6 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs) - } - } - -- /* console 0 frontend directory is not under /local/domain//device */ -- path = GCSPRINTF("/local/domain/%d/console/backend", domid); -- path = libxl__xs_read(gc, XBT_NULL, path); -- GCNEW(dev); -- if (path && strcmp(path, "") && -- libxl__parse_backend_path(gc, path, dev) == 0) { -- dev->domid = domid; -- dev->kind = LIBXL__DEVICE_KIND_CONSOLE; -- dev->devid = 0; -- -- /* Currently console devices can be destroyed synchronously by just -- * removing xenstore entries, this is what libxl__device_destroy does. -- */ -- libxl__device_destroy(gc, dev); -- } -- - out: - libxl__multidev_prepared(egc, multidev, rc); - } --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0004-libxl-Do-not-trust-frontend-in-libxl__device_nextid.patch xen-4.6.5/debian/patches/xsa175-4.6-0004-libxl-Do-not-trust-frontend-in-libxl__device_nextid.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0004-libxl-Do-not-trust-frontend-in-libxl__device_nextid.patch 2016-06-01 13:10:58.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0004-libxl-Do-not-trust-frontend-in-libxl__device_nextid.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,43 +0,0 @@ -From 31c78e76bdeb450ce0e0b32e9f5b0e284d4b9958 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 4 May 2016 15:30:32 +0100 -Subject: [PATCH 04/12] libxl: Do not trust frontend in libxl__device_nextid - -When selecting the devid for a new device, we should look in -/libxl/device for existing devices, not in the frontend area. - -This is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index 1366177..96f68dc 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -2008,15 +2008,16 @@ out: - /* common function to get next device id */ - static int libxl__device_nextid(libxl__gc *gc, uint32_t domid, char *device) - { -- char *dompath, **l; -+ char *libxl_dom_path, **l; - unsigned int nb; - int nextid = -1; - -- if (!(dompath = libxl__xs_get_dompath(gc, domid))) -+ if (!(libxl_dom_path = libxl__xs_libxl_path(gc, domid))) - return nextid; - - l = libxl__xs_directory(gc, XBT_NULL, -- GCSPRINTF("%s/device/%s", dompath, device), &nb); -+ GCSPRINTF("%s/device/%s", libxl_dom_path, device), -+ &nb); - if (l == NULL || nb == 0) - nextid = 0; - else --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0005-libxl-Do-not-trust-frontend-for-disk-eject-event.patch xen-4.6.5/debian/patches/xsa175-4.6-0005-libxl-Do-not-trust-frontend-for-disk-eject-event.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0005-libxl-Do-not-trust-frontend-for-disk-eject-event.patch 2016-06-01 13:11:23.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0005-libxl-Do-not-trust-frontend-for-disk-eject-event.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,104 +0,0 @@ -From f02320d65226f722bf8eaaa6bf6e0148d633965a Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 27 Apr 2016 16:08:49 +0100 -Subject: [PATCH 05/12] libxl: Do not trust frontend for disk eject event - -Use the /libxl path for interpreting disk eject watch events: do not -read the backend path out of the frontend. Instead, use the version -in /libxl. That avoids us relying on the guest-modifiable -$frontend/backend pointer. - -To implement this we store the path - /libxl/$guest/device/vbd/$devid/backend -in the evgen structure. - -This is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 28 ++++++++++++++++++++++------ - tools/libxl/libxl_internal.h | 2 +- - 2 files changed, 23 insertions(+), 7 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index 96f68dc..831b80c 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -1336,9 +1336,10 @@ static void disk_eject_xswatch_callback(libxl__egc *egc, libxl__ev_xswatch *w, - const char *wpath, const char *epath) { - EGC_GC; - libxl_evgen_disk_eject *evg = (void*)w; -- char *backend; -+ const char *backend; - char *value; - char backend_type[BACKEND_STRING_SIZE+1]; -+ int rc; - - value = libxl__xs_read(gc, XBT_NULL, wpath); - -@@ -1354,9 +1355,16 @@ static void disk_eject_xswatch_callback(libxl__egc *egc, libxl__ev_xswatch *w, - libxl_event *ev = NEW_EVENT(egc, DISK_EJECT, evg->domid, evg->user); - libxl_device_disk *disk = &ev->u.disk_eject.disk; - -- backend = libxl__xs_read(gc, XBT_NULL, -- libxl__sprintf(gc, "%.*s/backend", -- (int)strlen(wpath)-6, wpath)); -+ rc = libxl__xs_read_checked(gc, XBT_NULL, evg->be_ptr_path, &backend); -+ if (rc) { -+ LIBXL__EVENT_DISASTER(egc, "xs_read failed reading be_ptr_path", -+ errno, LIBXL_EVENT_TYPE_DISK_EJECT); -+ return; -+ } -+ if (!backend) { -+ /* device has been removed, not simply ejected */ -+ return; -+ } - - sscanf(backend, - "/local/domain/%d/backend/%" TOSTRING(BACKEND_STRING_SIZE) -@@ -1405,11 +1413,18 @@ int libxl_evenable_disk_eject(libxl_ctx *ctx, uint32_t guest_domid, - if (!domid) - domid = guest_domid; - -- path = libxl__sprintf(gc, "%s/device/vbd/%d/eject", -+ int devid = libxl__device_disk_dev_number(vdev, NULL, NULL); -+ -+ path = GCSPRINTF("%s/device/vbd/%d/eject", - libxl__xs_get_dompath(gc, domid), -- libxl__device_disk_dev_number(vdev, NULL, NULL)); -+ devid); - if (!path) { rc = ERROR_NOMEM; goto out; } - -+ const char *libxl_path = GCSPRINTF("%s/device/vbd/%d", -+ libxl__xs_libxl_path(gc, domid), -+ devid); -+ evg->be_ptr_path = libxl__sprintf(NOGC, "%s/backend", libxl_path); -+ - rc = libxl__ev_xswatch_register(gc, &evg->watch, - disk_eject_xswatch_callback, path); - if (rc) goto out; -@@ -1436,6 +1451,7 @@ void libxl__evdisable_disk_eject(libxl__gc *gc, libxl_evgen_disk_eject *evg) { - libxl__ev_xswatch_deregister(gc, &evg->watch); - - free(evg->vdev); -+ free(evg->be_ptr_path); - free(evg); - - CTX_UNLOCK; -diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h -index f034f63..1f11b1a 100644 ---- a/tools/libxl/libxl_internal.h -+++ b/tools/libxl/libxl_internal.h -@@ -337,7 +337,7 @@ struct libxl__evgen_disk_eject { - uint32_t domid; - LIBXL_LIST_ENTRY(libxl_evgen_disk_eject) entry; - libxl_ev_user user; -- char *vdev; -+ char *vdev, *be_ptr_path; - }; - _hidden void - libxl__evdisable_disk_eject(libxl__gc*, libxl_evgen_disk_eject*); --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0006-libxl-Do-not-trust-frontend-for-disk-in-getinfo.patch xen-4.6.5/debian/patches/xsa175-4.6-0006-libxl-Do-not-trust-frontend-for-disk-in-getinfo.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0006-libxl-Do-not-trust-frontend-for-disk-in-getinfo.patch 2016-06-01 13:11:53.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0006-libxl-Do-not-trust-frontend-for-disk-in-getinfo.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,79 +0,0 @@ -From d3e1c77a8ae7820e0906f324254b551995624ab5 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 29 Apr 2016 19:21:51 +0100 -Subject: [PATCH 06/12] libxl: Do not trust frontend for disk in getinfo - -* Rename the frontend variable to `fe_path' to check we caught them all -* Read the backend path from /libxl, rather than from the frontend -* Parse the backend domid from the backend path, rather than reading it - from the frontend (and add the appropriate error path and initialisation) - -This is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 27 +++++++++++++++++++-------- - 1 file changed, 19 insertions(+), 8 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index 831b80c..b418564 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -2821,27 +2821,34 @@ int libxl_device_disk_getinfo(libxl_ctx *ctx, uint32_t domid, - libxl_device_disk *disk, libxl_diskinfo *diskinfo) - { - GC_INIT(ctx); -- char *dompath, *diskpath; -+ char *dompath, *fe_path, *libxl_path; - char *val; -+ int rc; -+ -+ diskinfo->backend = NULL; - - dompath = libxl__xs_get_dompath(gc, domid); - diskinfo->devid = libxl__device_disk_dev_number(disk->vdev, NULL, NULL); - - /* tap devices entries in xenstore are written as vbd devices. */ -- diskpath = libxl__sprintf(gc, "%s/device/vbd/%d", dompath, diskinfo->devid); -+ fe_path = GCSPRINTF("%s/device/vbd/%d", dompath, diskinfo->devid); -+ libxl_path = GCSPRINTF("%s/device/vbd/%d", -+ libxl__xs_libxl_path(gc, domid), diskinfo->devid); - diskinfo->backend = xs_read(ctx->xsh, XBT_NULL, -- libxl__sprintf(gc, "%s/backend", diskpath), NULL); -+ GCSPRINTF("%s/backend", libxl_path), NULL); - if (!diskinfo->backend) { - GC_FREE; - return ERROR_FAIL; - } -- val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/backend-id", diskpath)); -- diskinfo->backend_id = val ? strtoul(val, NULL, 10) : -1; -- val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/state", diskpath)); -+ rc = libxl__backendpath_parse_domid(gc, diskinfo->backend, -+ &diskinfo->backend_id); -+ if (rc) goto out; -+ -+ val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/state", fe_path)); - diskinfo->state = val ? strtoul(val, NULL, 10) : -1; -- val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/event-channel", diskpath)); -+ val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/event-channel", fe_path)); - diskinfo->evtch = val ? strtoul(val, NULL, 10) : -1; -- val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/ring-ref", diskpath)); -+ val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/ring-ref", fe_path)); - diskinfo->rref = val ? strtoul(val, NULL, 10) : -1; - diskinfo->frontend = xs_read(ctx->xsh, XBT_NULL, - libxl__sprintf(gc, "%s/frontend", diskinfo->backend), NULL); -@@ -2850,6 +2857,10 @@ int libxl_device_disk_getinfo(libxl_ctx *ctx, uint32_t domid, - - GC_FREE; - return 0; -+ -+ out: -+ free(diskinfo->backend); -+ return rc; - } - - int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk, --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0007-libxl-Do-not-trust-frontend-for-vtpm-list.patch xen-4.6.5/debian/patches/xsa175-4.6-0007-libxl-Do-not-trust-frontend-for-vtpm-list.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0007-libxl-Do-not-trust-frontend-for-vtpm-list.patch 2016-06-01 13:12:18.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0007-libxl-Do-not-trust-frontend-for-vtpm-list.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,67 +0,0 @@ -From c277f046e8d5c8413d6436685dbda75c5626d577 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Tue, 3 May 2016 15:58:32 +0100 -Subject: [PATCH 07/12] libxl: Do not trust frontend for vtpm list - -libxl_device_vtpm_list needs to enumerate and identify devices without -trusting frontend-controlled data. So - -* Use the /libxl path to enumerate vtpms. -* Use the /libxl path to find the corresponding backends. -* Parse the backend path to find the backend domid. - -This is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 16 ++++++++-------- - 1 file changed, 8 insertions(+), 8 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index b418564..6590157 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -2196,14 +2196,15 @@ libxl_device_vtpm *libxl_device_vtpm_list(libxl_ctx *ctx, uint32_t domid, int *n - GC_INIT(ctx); - - libxl_device_vtpm* vtpms = NULL; -- char* fe_path = NULL; -+ char *libxl_path; - char** dir = NULL; - unsigned int ndirs = 0; -+ int rc; - - *num = 0; - -- fe_path = libxl__sprintf(gc, "%s/device/vtpm", libxl__xs_get_dompath(gc, domid)); -- dir = libxl__xs_directory(gc, XBT_NULL, fe_path, &ndirs); -+ libxl_path = GCSPRINTF("%s/device/vtpm", libxl__xs_libxl_path(gc, domid)); -+ dir = libxl__xs_directory(gc, XBT_NULL, libxl_path, &ndirs); - if (dir && ndirs) { - vtpms = malloc(sizeof(*vtpms) * ndirs); - libxl_device_vtpm* vtpm; -@@ -2212,16 +2213,15 @@ libxl_device_vtpm *libxl_device_vtpm_list(libxl_ctx *ctx, uint32_t domid, int *n - char* tmp; - const char* be_path = libxl__xs_read(gc, XBT_NULL, - GCSPRINTF("%s/%s/backend", -- fe_path, *dir)); -+ libxl_path, *dir)); - - libxl_device_vtpm_init(vtpm); - - vtpm->devid = atoi(*dir); - -- tmp = libxl__xs_read(gc, XBT_NULL, -- GCSPRINTF("%s/%s/backend-id", -- fe_path, *dir)); -- vtpm->backend_domid = atoi(tmp); -+ rc = libxl__backendpath_parse_domid(gc, be_path, -+ &vtpm->backend_domid); -+ if (rc) return NULL; - - tmp = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/uuid", be_path)); - if (tmp) { --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0008-libxl-Do-not-trust-frontend-for-vtpm-in-getinfo.patch xen-4.6.5/debian/patches/xsa175-4.6-0008-libxl-Do-not-trust-frontend-for-vtpm-in-getinfo.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0008-libxl-Do-not-trust-frontend-for-vtpm-in-getinfo.patch 2016-06-01 13:12:56.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0008-libxl-Do-not-trust-frontend-for-vtpm-in-getinfo.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -From d36cdc6238506f65ca9ce2bb008c61e45dbcb9d0 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Tue, 3 May 2016 16:00:20 +0100 -Subject: [PATCH 08/12] libxl: Do not trust frontend for vtpm in getinfo - -libxl_device_vtpm_getinfo needs to examine devices without trusting -frontend-controlled data. So: - -* Use /libxl to find the backend path. -* Parse the backend path to find the backend domid, rather than - reading it from the frontend. - -This is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 12 +++++++----- - 1 file changed, 7 insertions(+), 5 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index 6590157..a424832 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -2245,7 +2245,7 @@ int libxl_device_vtpm_getinfo(libxl_ctx *ctx, - libxl_vtpminfo *vtpminfo) - { - GC_INIT(ctx); -- char *dompath, *vtpmpath; -+ char *libxl_path, *dompath, *vtpmpath; - char *val; - int rc = 0; - -@@ -2254,8 +2254,10 @@ int libxl_device_vtpm_getinfo(libxl_ctx *ctx, - vtpminfo->devid = vtpm->devid; - - vtpmpath = GCSPRINTF("%s/device/vtpm/%d", dompath, vtpminfo->devid); -+ libxl_path = GCSPRINTF("%s/device/vtpm/%d", -+ libxl__xs_libxl_path(gc, domid), vtpminfo->devid); - vtpminfo->backend = xs_read(ctx->xsh, XBT_NULL, -- GCSPRINTF("%s/backend", vtpmpath), NULL); -+ GCSPRINTF("%s/backend", libxl_path), NULL); - if (!vtpminfo->backend) { - goto err; - } -@@ -2263,9 +2265,9 @@ int libxl_device_vtpm_getinfo(libxl_ctx *ctx, - goto err; - } - -- val = libxl__xs_read(gc, XBT_NULL, -- GCSPRINTF("%s/backend-id", vtpmpath)); -- vtpminfo->backend_id = val ? strtoul(val, NULL, 10) : -1; -+ rc = libxl__backendpath_parse_domid(gc, vtpminfo->backend, -+ &vtpminfo->backend_id); -+ if (rc) goto exit; - - val = libxl__xs_read(gc, XBT_NULL, - GCSPRINTF("%s/state", vtpmpath)); --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0009-libxl-Do-not-trust-frontend-for-nic-in-libxl_devid_t.patch xen-4.6.5/debian/patches/xsa175-4.6-0009-libxl-Do-not-trust-frontend-for-nic-in-libxl_devid_t.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0009-libxl-Do-not-trust-frontend-for-nic-in-libxl_devid_t.patch 2016-06-01 13:13:20.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0009-libxl-Do-not-trust-frontend-for-nic-in-libxl_devid_t.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,47 +0,0 @@ -From 3e1e9ff62717f4c9b231e82a299e960bd4da4506 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Tue, 3 May 2016 15:52:53 +0100 -Subject: [PATCH 09/12] libxl: Do not trust frontend for nic in - libxl_devid_to_device_nic - -Find the backend by reading the pointer in /libxl rather than in the -guest's frontend area. - -This is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index a424832..225e659 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -3512,17 +3512,17 @@ int libxl_devid_to_device_nic(libxl_ctx *ctx, uint32_t domid, - int devid, libxl_device_nic *nic) - { - GC_INIT(ctx); -- char *dompath, *path; -+ char *libxl_dom_path, *path; - int rc = ERROR_FAIL; - - libxl_device_nic_init(nic); -- dompath = libxl__xs_get_dompath(gc, domid); -- if (!dompath) -+ libxl_dom_path = libxl__xs_libxl_path(gc, domid); -+ if (!libxl_dom_path) - goto out; - - path = libxl__xs_read(gc, XBT_NULL, -- libxl__sprintf(gc, "%s/device/vif/%d/backend", -- dompath, devid)); -+ GCSPRINTF("%s/device/vif/%d/backend", libxl_dom_path, -+ devid)); - if (!path) - goto out; - --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0010-libxl-Do-not-trust-frontend-for-nic-in-getinfo.patch xen-4.6.5/debian/patches/xsa175-4.6-0010-libxl-Do-not-trust-frontend-for-nic-in-getinfo.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0010-libxl-Do-not-trust-frontend-for-nic-in-getinfo.patch 2016-06-01 13:13:49.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0010-libxl-Do-not-trust-frontend-for-nic-in-getinfo.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,73 +0,0 @@ -From 4bb4bb97faacc6135fb0640be9da10c9f2dc5592 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Tue, 3 May 2016 16:31:07 +0100 -Subject: [PATCH 10/12] libxl: Do not trust frontend for nic in getinfo - -libxl_device_nic_getinfo needs to examine devices without trusting -frontend-controlled data. So: - -* Use /libxl to find the backend path. -* Parse the backend path to find the backend domid, rather than - reading it from the frontend. - -This is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 21 ++++++++++++++------- - 1 file changed, 14 insertions(+), 7 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index 225e659..105a9cc 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -3601,22 +3601,27 @@ int libxl_device_nic_getinfo(libxl_ctx *ctx, uint32_t domid, - libxl_device_nic *nic, libxl_nicinfo *nicinfo) - { - GC_INIT(ctx); -- char *dompath, *nicpath; -+ char *dompath, *nicpath, *libxl_path; - char *val; -+ int rc; - - dompath = libxl__xs_get_dompath(gc, domid); - nicinfo->devid = nic->devid; - -- nicpath = libxl__sprintf(gc, "%s/device/vif/%d", dompath, nicinfo->devid); -+ nicpath = GCSPRINTF("%s/device/vif/%d", dompath, nicinfo->devid); -+ libxl_path = GCSPRINTF("%s/device/vif/%d", -+ libxl__xs_libxl_path(gc, domid), nicinfo->devid); - nicinfo->backend = xs_read(ctx->xsh, XBT_NULL, -- libxl__sprintf(gc, "%s/backend", nicpath), NULL); -+ GCSPRINTF("%s/backend", libxl_path), NULL); - if (!nicinfo->backend) { - GC_FREE; - return ERROR_FAIL; - } -- val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/backend-id", nicpath)); -- nicinfo->backend_id = val ? strtoul(val, NULL, 10) : -1; -- val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/state", nicpath)); -+ rc = libxl__backendpath_parse_domid(gc, nicinfo->backend, -+ &nicinfo->backend_id); -+ if (rc) goto out; -+ -+ val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/state", nicpath)); - nicinfo->state = val ? strtoul(val, NULL, 10) : -1; - val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/event-channel", nicpath)); - nicinfo->evtch = val ? strtoul(val, NULL, 10) : -1; -@@ -3629,8 +3634,10 @@ int libxl_device_nic_getinfo(libxl_ctx *ctx, uint32_t domid, - val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/frontend-id", nicinfo->backend)); - nicinfo->frontend_id = val ? strtoul(val, NULL, 10) : -1; - -+ rc = 0; -+ out: - GC_FREE; -- return 0; -+ return rc; - } - - const char *libxl__device_nic_devname(libxl__gc *gc, --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0011-libxl-Do-not-trust-frontend-for-channel-in-list.patch xen-4.6.5/debian/patches/xsa175-4.6-0011-libxl-Do-not-trust-frontend-for-channel-in-list.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0011-libxl-Do-not-trust-frontend-for-channel-in-list.patch 2016-06-01 13:14:19.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0011-libxl-Do-not-trust-frontend-for-channel-in-list.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,104 +0,0 @@ -From 57c13510f626dc46b568e07d47870f86acb5c9ad Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Tue, 3 May 2016 17:01:56 +0100 -Subject: [PATCH 11/12] libxl: Do not trust frontend for channel in list - -libxl_device_channel_list should not trust frontend-provided data. - -So it needs to iterate using the /libxl paths, and read the backend -path out of /libxl. - -However, it also filters out pure "consoles", which are channels -without a "name". But the name was stored only in the frontend -directory, which the frontend can delete. - -So store the name in the backend too. (Ideally we would store it in -/libxl, where the backend can't write to it either, but -libxl__device_console_add not currently have access to the xenstore -transaction used by libxl__device_generic_add. Protection against the -backend will come later, in XSA-178.) - -Because the libxl paths are defined to be in terms of the frontend -device types, not the backend device types, it is no longer correct -for libxl__append_channel_list to take a type argument. Abolish this -(with no functional effect). - -This is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 25 ++++++++++++++----------- - 1 file changed, 14 insertions(+), 11 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index 105a9cc..48d491f 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -3698,6 +3698,8 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t domid, - if (console->name) { - flexarray_append(ro_front, "name"); - flexarray_append(ro_front, console->name); -+ flexarray_append(back, "name"); -+ flexarray_append(back, console->name); - } - if (console->connection) { - flexarray_append(back, "connection"); -@@ -3836,34 +3838,35 @@ static int libxl__device_channel_from_xs_be(libxl__gc *gc, - return rc; - } - --static int libxl__append_channel_list_of_type(libxl__gc *gc, -+static int libxl__append_channel_list(libxl__gc *gc, - uint32_t domid, -- const char *type, - libxl_device_channel **channels, - int *nchannels) - { -- char *fe_path = NULL, *be_path = NULL; -+ char *libxl_dir_path = NULL, *be_path = NULL; - char **dir = NULL; - unsigned int n = 0, devid = 0; - libxl_device_channel *next = NULL; - int rc = 0, i; - -- fe_path = GCSPRINTF("%s/device/%s", -- libxl__xs_get_dompath(gc, domid), type); -- dir = libxl__xs_directory(gc, XBT_NULL, fe_path, &n); -+ libxl_dir_path = GCSPRINTF("%s/device/console", -+ libxl__xs_libxl_path(gc, domid)); -+ dir = libxl__xs_directory(gc, XBT_NULL, libxl_dir_path, &n); - if (!dir || !n) - goto out; - - for (i = 0; i < n; i++) { -- const char *p, *name; -+ const char *libxl_path, *name; - libxl_device_channel *tmp; - -- p = libxl__sprintf(gc, "%s/%s", fe_path, dir[i]); -- name = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/name", p)); -+ libxl_path = GCSPRINTF("%s/%s", libxl_dir_path, dir[i]); -+ be_path = libxl__xs_read(gc, XBT_NULL, -+ GCSPRINTF("%s/backend", libxl_path)); -+ if (!be_path) continue; -+ name = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/name", be_path)); - /* 'channels' are consoles with names, so ignore all consoles - without names */ - if (!name) continue; -- be_path = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/backend", p)); - tmp = realloc(*channels, - sizeof(libxl_device_channel) * (*nchannels + devid + 1)); - if (!tmp) { -@@ -3894,7 +3897,7 @@ libxl_device_channel *libxl_device_channel_list(libxl_ctx *ctx, - - *num = 0; - -- rc = libxl__append_channel_list_of_type(gc, domid, "console", &channels, num); -+ rc = libxl__append_channel_list(gc, domid, &channels, num); - if (rc) goto out_err; - - GC_FREE; --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0012-libxl-Do-not-trust-frontend-for-channel-in-getinfo.patch xen-4.6.5/debian/patches/xsa175-4.6-0012-libxl-Do-not-trust-frontend-for-channel-in-getinfo.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0012-libxl-Do-not-trust-frontend-for-channel-in-getinfo.patch 2016-06-01 13:14:43.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0012-libxl-Do-not-trust-frontend-for-channel-in-getinfo.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,121 +0,0 @@ -From 1a83149ba5f2a7faed29d45b5d6dddfff78459bc Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Tue, 3 May 2016 17:24:32 +0100 -Subject: [PATCH 12/12] libxl: Do not trust frontend for channel in getinfo - -libxl_device_channel_getinfo needs to examine devices without trusting -frontend-controlled data. So: - -* Use /libxl to find the backend path. -* Parse the backend path to find the backend domid, rather than - reading it from the frontend. -* Tolerate FRONTEND/tty vanishing. - -Note that there is a strange off-by-one error in the computation of -both fe_path and libxl_path in libxl_device_channel_getinfo: the -incoming channel->devid, which is copied to channelinfo->devid, has +1 -applied to calculate the frontend path (and, after this patch, the -libxl path). I.e., the devid passed to libxl_device_channel_getinfo -must be one less than the actual devid for the device being asked -about. - -This is actually a bug which mirrors a bug in -libxl__append_channel_list, which fills in the devids of the channel -devices it finds with sequentially increasing numbers starting at 0. - -In the usual case channels have real devids starting at 1 (because -there is the console, which is devid 0, but not a channel). So these -bugs usually cancel out. - -We do not address this problem at this time. This bug does not have -any security implications. - -This patch is part of XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 44 ++++++++++++++++++++++++++++++++++++-------- - 1 file changed, 36 insertions(+), 8 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index 48d491f..db92fae 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -3918,23 +3918,28 @@ int libxl_device_channel_getinfo(libxl_ctx *ctx, uint32_t domid, - libxl_channelinfo *channelinfo) - { - GC_INIT(ctx); -- char *dompath, *fe_path; -+ char *dompath, *fe_path, *libxl_path; - char *val; -+ int rc; - - dompath = libxl__xs_get_dompath(gc, domid); - channelinfo->devid = channel->devid; - -- fe_path = libxl__sprintf(gc, "%s/device/console/%d", dompath, -- channelinfo->devid + 1); -+ fe_path = GCSPRINTF("%s/device/console/%d", dompath, -+ channelinfo->devid + 1); -+ libxl_path = GCSPRINTF("%s/device/console/%d", -+ libxl__xs_libxl_path(gc, domid), -+ channelinfo->devid + 1); - channelinfo->backend = xs_read(ctx->xsh, XBT_NULL, -- libxl__sprintf(gc, "%s/backend", -- fe_path), NULL); -+ GCSPRINTF("%s/backend", libxl_path), NULL); - if (!channelinfo->backend) { - GC_FREE; - return ERROR_FAIL; - } -- val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/backend-id", fe_path)); -- channelinfo->backend_id = val ? strtoul(val, NULL, 10) : -1; -+ rc = libxl__backendpath_parse_domid(gc, channelinfo->backend, -+ &channelinfo->backend_id); -+ if (rc) goto out; -+ - val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/state", fe_path)); - channelinfo->state = val ? strtoul(val, NULL, 10) : -1; - channelinfo->frontend = xs_read(ctx->xsh, XBT_NULL, -@@ -3952,13 +3957,36 @@ int libxl_device_channel_getinfo(libxl_ctx *ctx, uint32_t domid, - switch (channel->connection) { - case LIBXL_CHANNEL_CONNECTION_PTY: - val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/tty", fe_path)); -+ /* -+ * It is obviously very wrong for this value to be in the -+ * frontend. But in XSA-175 we don't want to re-engineer -+ * this because other xenconsole code elsewhere (some -+ * even out of tree, perhaps) expects this node to be -+ * here. -+ * -+ * FE/pty is readonly for the guest. It always exists if -+ * FE does because libxl__device_console_add -+ * unconditionally creates it and nothing deletes it. -+ * -+ * The guest can delete the whole FE (which it has write -+ * privilege on) but the containing directories -+ * /local/GUEST[/device[/console]] are also RO for the -+ * guest. So if the guest deletes FE it cannot recreate -+ * it. -+ * -+ * Therefore the guest cannot cause FE/pty to contain bad -+ * data, although it can cause it to not exist. -+ */ -+ if (!val) val = "/NO-SUCH-PATH"; - channelinfo->u.pty.path = strdup(val); - break; - default: - break; - } -+ rc = 0; -+ out: - GC_FREE; -- return 0; -+ return rc; - } - - /******************************************************************************/ --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0013-libxl-Cleanup-Have-libxl__alloc_vdev-use-libxl.patch xen-4.6.5/debian/patches/xsa175-4.6-0013-libxl-Cleanup-Have-libxl__alloc_vdev-use-libxl.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0013-libxl-Cleanup-Have-libxl__alloc_vdev-use-libxl.patch 2016-06-07 14:23:11.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0013-libxl-Cleanup-Have-libxl__alloc_vdev-use-libxl.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,48 +0,0 @@ -From 55636dd778b92325e6f34685e825ff6f08b5cb07 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Tue, 3 May 2016 15:25:19 +0100 -Subject: [PATCH 1/3] libxl: Cleanup: Have libxl__alloc_vdev use /libxl - -When allocating a vdev for a new disk, look in /libxl/device, rather -than the frontends directory in xenstore. - -This is more in line with the other parts of libxl, which ought not to -trust frontends. In this case, though, there is no security bug prior -to this patch because the frontend is the toolstack domain itself. - -If libxl__alloc_vdev were ever changed to take a frontend domain -argument, this patch will fix a latent security bug. - -This is a followup to XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index 41d125f..bfd7df9 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -3068,7 +3068,7 @@ static char * libxl__alloc_vdev(libxl__gc *gc, void *get_vdev_user, - { - const char *blkdev_start = (const char *) get_vdev_user; - int devid = 0, disk = 0, part = 0; -- char *dompath = libxl__xs_get_dompath(gc, LIBXL_TOOLSTACK_DOMID); -+ char *libxl_dom_path = libxl__xs_libxl_path(gc, LIBXL_TOOLSTACK_DOMID); - - libxl__device_disk_dev_number(blkdev_start, &disk, &part); - if (part != 0) { -@@ -3083,7 +3083,7 @@ static char * libxl__alloc_vdev(libxl__gc *gc, void *get_vdev_user, - return NULL; - if (libxl__xs_read(gc, t, - libxl__sprintf(gc, "%s/device/vbd/%d/backend", -- dompath, devid)) == NULL) { -+ libxl_dom_path, devid)) == NULL) { - if (errno == ENOENT) - return libxl__devid_to_localdev(gc, devid); - else --- -1.9.1 - diff -Nru xen-4.6.0/debian/patches/xsa175-4.6-0014-libxl-Document-serial-correctly.patch xen-4.6.5/debian/patches/xsa175-4.6-0014-libxl-Document-serial-correctly.patch --- xen-4.6.0/debian/patches/xsa175-4.6-0014-libxl-Document-serial-correctly.patch 2016-06-07 14:23:20.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa175-4.6-0014-libxl-Document-serial-correctly.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ -From 44d8545ba68800e067a0a25d6d0e236758c59771 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 4 May 2016 15:17:45 +0100 -Subject: [PATCH 3/3] libxl: Document ~/serial/ correctly - -xenstore-paths.markdown talked about ~/device/serial/, but that's not -used. - -(It is very wrong for this value, which contains a driver domain -filesystem path, to be in the guest's area of xenstore. However, it -is only ever created by libxl and ready by xenconsoled. When it is -created, it inherits the read-only permissions of /local/domain/DOMID. -So there is no security bug.) - -This is a followup to XSA-175. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - docs/misc/xenstore-paths.markdown | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/docs/misc/xenstore-paths.markdown b/docs/misc/xenstore-paths.markdown -index 8c686ec..bfa6a79 100644 ---- a/docs/misc/xenstore-paths.markdown -+++ b/docs/misc/xenstore-paths.markdown -@@ -240,7 +240,7 @@ The primary PV console device. Described in [console.txt](console.txt) - - A secondary PV console device. Described in [console.txt](console.txt) - --#### ~/device/serial/$DEVID/* [HVM] -+#### ~/serial/$DEVID/* [HVM] - - An emulated serial device. Described in [console.txt](console.txt) - --- -1.9.1 - diff -Nru xen-4.6.0/debian/patches/xsa176.patch xen-4.6.5/debian/patches/xsa176.patch --- xen-4.6.0/debian/patches/xsa176.patch 2016-06-01 13:15:17.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa176.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,45 +0,0 @@ -x86/mm: fully honor PS bits in guest page table walks - -In L4 entries it is currently unconditionally reserved (and hence -should, when set, always result in a reserved bit page fault), and is -reserved on hardware not supporting 1Gb pages (and hence should, when -set, similarly cause a reserved bit page fault on such hardware). - -This is CVE-2016-4480 / XSA-176. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Tested-by: Andrew Cooper - ---- a/xen/arch/x86/mm/guest_walk.c -+++ b/xen/arch/x86/mm/guest_walk.c -@@ -226,6 +226,11 @@ guest_walk_tables(struct vcpu *v, struct - rc |= _PAGE_PRESENT; - goto out; - } -+ if ( gflags & _PAGE_PSE ) -+ { -+ rc |= _PAGE_PSE | _PAGE_INVALID_BIT; -+ goto out; -+ } - rc |= ((gflags & mflags) ^ mflags); - - /* Map the l3 table */ -@@ -247,7 +252,7 @@ guest_walk_tables(struct vcpu *v, struct - } - rc |= ((gflags & mflags) ^ mflags); - -- pse1G = (gflags & _PAGE_PSE) && guest_supports_1G_superpages(v); -+ pse1G = !!(gflags & _PAGE_PSE); - - if ( pse1G ) - { -@@ -267,6 +272,8 @@ guest_walk_tables(struct vcpu *v, struct - /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */ - flags &= ~_PAGE_PAT; - -+ if ( !guest_supports_1G_superpages(v) ) -+ rc |= _PAGE_PSE | _PAGE_INVALID_BIT; - if ( gfn_x(start) & GUEST_L3_GFN_MASK & ~0x1 ) - rc |= _PAGE_INVALID_BITS; - diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0001-libxl-Make-copy-of-every-xs-backend-in-libxl-in-_gen.patch xen-4.6.5/debian/patches/xsa178-unstable-0001-libxl-Make-copy-of-every-xs-backend-in-libxl-in-_gen.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0001-libxl-Make-copy-of-every-xs-backend-in-libxl-in-_gen.patch 2016-06-01 13:16:43.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0001-libxl-Make-copy-of-every-xs-backend-in-libxl-in-_gen.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,98 +0,0 @@ -From d0712483981daf5a748c1cd083fe61d8d9ea8102 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 29 Apr 2016 16:19:28 +0100 -Subject: [PATCH 01/21] libxl: Make copy of every xs backend in /libxl in - _generic_add - -We want to stop libxl trustingly reading information from the backend -directory (since this is, of course, writeable by the backend, which -might be a semi-trusted driver domain). - -In principle it is wrong in current libxl for anything to try to -divine virtual device configuration from xenstore: the JSON domain -config ought to supply that, and xenstore should only tell us which -devices actually exist. - -However: - -Firstly, there are several existing places where configuration -information is retrieved from xenstore rather than JSON. We do not -want to reen gineer this in a security patch. - -Secondly, we want to make a security patch which can be backported to -versions of libxl without the JSON configuration machinery. - -So we take the expedient approach of keeping a copy of the -configuration somewhere we trust, namely /libxl. This is obviously -fairly low-risk, although it does write significantly more keys in -xenstore. - -In this patch we make this change in libxl__device_generic_add. This -is responsible for actually writing the vast majority of device -information to xenstore. There are a few loose ends which will be -dealt with in a moment. - -Likewise, changes to readers to use the new location will appear in -further patches. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - docs/misc/xenstore-paths.markdown | 4 ++++ - tools/libxl/libxl_device.c | 23 +++++++++++++++++++++++ - 2 files changed, 27 insertions(+) - -diff --git a/docs/misc/xenstore-paths.markdown b/docs/misc/xenstore-paths.markdown -index 2f545c1..261ee42 100644 ---- a/docs/misc/xenstore-paths.markdown -+++ b/docs/misc/xenstore-paths.markdown -@@ -549,6 +549,10 @@ Path in xenstore to the frontend, normally - Path in xenstore to the backend, normally - /local/domain/$BACKEND_DOMID/backend/$KIND/$DOMID/$DEVID - -+#### /libxl/$DOMID/device/$KIND/$DEVID/$NODE -+ -+Trustworthy copy of /local/domain/$DOMID/backend/$KIND/$DEVID/$NODE. -+ - #### /libxl/$DOMID/dm-version ("qemu\_xen"|"qemu\_xen\_traditional") = [n,INTERNAL] - - The device model version for a domain. -diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c -index 16384f8..4b61b4c 100644 ---- a/tools/libxl/libxl_device.c -+++ b/tools/libxl/libxl_device.c -@@ -185,6 +185,29 @@ retry_transaction: - xs_write(ctx->xsh, t, GCSPRINTF("%s/frontend", backend_path), - frontend_path, strlen(frontend_path)); - libxl__xs_writev(gc, t, backend_path, bents); -+ -+ /* -+ * We make a copy of everything for the backend in the libxl -+ * path as well. This means we don't need to trust the -+ * backend. Ideally this information would not be used and we -+ * would use the information from the json configuration -+ * instead. But there are still places in libxl that try to -+ * reconstruct a config from xenstore. -+ * -+ * This duplication will typically produces duplicate keys -+ * which will go out of date, but that's OK because nothing -+ * reads those. For example, there is usually -+ * /libxl/$guest/device/$kind/$devid/state -+ * which starts out containing XenbusStateInitialising ("1") -+ * just like the copy in -+ * /local/domain/$driverdom/backend/$guest/$kind/$devid/state -+ * but which won't ever be updated. -+ * -+ * This duplication is superfluous and messy but as discussed -+ * the proper fix is more intrusive than we want to do now. -+ */ -+ rc = libxl__xs_writev(gc, t, libxl_path, bents); -+ if (rc) goto out; - } - - if (!create_transaction) --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0002-libxl-Do-not-trust-backend-in-libxl__device_exists.patch xen-4.6.5/debian/patches/xsa178-unstable-0002-libxl-Do-not-trust-backend-in-libxl__device_exists.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0002-libxl-Do-not-trust-backend-in-libxl__device_exists.patch 2016-06-01 13:17:16.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0002-libxl-Do-not-trust-backend-in-libxl__device_exists.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -From 24a66d54e50f93059c37a62c40cb957324196cd2 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 4 May 2016 15:04:35 +0100 -Subject: [PATCH 02/21] libxl: Do not trust backend in libxl__device_exists - -To determine whether a device is supposed to exist, look in /libxl, -rather than the backend. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl_device.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c -index 4b61b4c..4717027 100644 ---- a/tools/libxl/libxl_device.c -+++ b/tools/libxl/libxl_device.c -@@ -54,7 +54,7 @@ int libxl__device_exists(libxl__gc *gc, xs_transaction_t t, - libxl__device *device) - { - int rc; -- char *be_path = libxl__device_backend_path(gc, device); -+ char *be_path = libxl__device_libxl_path(gc, device); - const char *dir; - - rc = libxl__xs_read_checked(gc, t, be_path, &dir); --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0003-libxl-Do-not-trust-backend-for-vtpm-in-getinfo-excep.patch xen-4.6.5/debian/patches/xsa178-unstable-0003-libxl-Do-not-trust-backend-for-vtpm-in-getinfo-excep.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0003-libxl-Do-not-trust-backend-for-vtpm-in-getinfo-excep.patch 2016-06-01 13:17:50.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0003-libxl-Do-not-trust-backend-for-vtpm-in-getinfo-excep.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -From 985fbad8be66189670570dbb7cac2bb606d82ed0 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 29 Apr 2016 17:18:44 +0100 -Subject: [PATCH 03/21] libxl: Do not trust backend for vtpm in getinfo (except - uuid) - -* Do not check the backend for existence. We have already read the - /libxl path so know that the vtpm exists (or is supposed to); if the - backend doesn't exist then that must be the backend's doing. -* Get the frontend path from the /libxl directory. -* The frontend domid is the guest domid, and does not need to be read - from xenstore (!) - -We still attempt to read the uuid from the backend. This will be -fixed in the next patch. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 10 ++-------- - 1 file changed, 2 insertions(+), 8 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index 75539e9..3dd47ec 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -2241,9 +2241,6 @@ int libxl_device_vtpm_getinfo(libxl_ctx *ctx, - if (!vtpminfo->backend) { - goto err; - } -- if(!libxl__xs_read(gc, XBT_NULL, vtpminfo->backend)) { -- goto err; -- } - - rc = libxl__backendpath_parse_domid(gc, vtpminfo->backend, - &vtpminfo->backend_id); -@@ -2262,11 +2259,8 @@ int libxl_device_vtpm_getinfo(libxl_ctx *ctx, - vtpminfo->rref = val ? strtoul(val, NULL, 10) : -1; - - vtpminfo->frontend = xs_read(ctx->xsh, XBT_NULL, -- GCSPRINTF("%s/frontend", vtpminfo->backend), NULL); -- -- val = libxl__xs_read(gc, XBT_NULL, -- GCSPRINTF("%s/frontend-id", vtpminfo->backend)); -- vtpminfo->frontend_id = val ? strtoul(val, NULL, 10) : -1; -+ GCSPRINTF("%s/frontend", libxl_path), NULL); -+ vtpminfo->frontend_id = domid; - - val = libxl__xs_read(gc, XBT_NULL, - GCSPRINTF("%s/uuid", vtpminfo->backend)); --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0004-libxl-Do-not-trust-backend-for-vtpm-in-getinfo-uuid.patch xen-4.6.5/debian/patches/xsa178-unstable-0004-libxl-Do-not-trust-backend-for-vtpm-in-getinfo-uuid.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0004-libxl-Do-not-trust-backend-for-vtpm-in-getinfo-uuid.patch 2016-06-01 13:18:25.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0004-libxl-Do-not-trust-backend-for-vtpm-in-getinfo-uuid.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ -From 4831002680a39e327a105402f0162e3d57c9d86f Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 29 Apr 2016 16:57:14 +0100 -Subject: [PATCH 04/21] libxl: Do not trust backend for vtpm in getinfo (uuid) - -Use uuid from /libxl, rather than from backend. I think the backend -is not supposed to change the uuid, since it seems to be set by libxl -during setup. - -If in fact the backend is supposed to be able to change the uuid, this -patch needs to be dropped and replaced by a patch which makes the vtpm -uuid lookup tolerate bad or missing data. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index 3dd47ec..a1cc220 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -2203,7 +2203,7 @@ libxl_device_vtpm *libxl_device_vtpm_list(libxl_ctx *ctx, uint32_t domid, int *n - &vtpm->backend_domid); - if (rc) return NULL; - -- tmp = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/uuid", be_path)); -+ tmp = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/uuid", libxl_path)); - if (tmp) { - if(libxl_uuid_from_string(&(vtpm->uuid), tmp)) { - LOG(ERROR, "%s/uuid is a malformed uuid?? (%s) Probably a bug!!\n", be_path, tmp); -@@ -2263,7 +2263,7 @@ int libxl_device_vtpm_getinfo(libxl_ctx *ctx, - vtpminfo->frontend_id = domid; - - val = libxl__xs_read(gc, XBT_NULL, -- GCSPRINTF("%s/uuid", vtpminfo->backend)); -+ GCSPRINTF("%s/uuid", libxl_path)); - if(val == NULL) { - LOG(ERROR, "%s/uuid does not exist!", vtpminfo->backend); - goto err; --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0005-libxl-cdrom-eject-and-insert-write-to-libxl.patch xen-4.6.5/debian/patches/xsa178-unstable-0005-libxl-cdrom-eject-and-insert-write-to-libxl.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0005-libxl-cdrom-eject-and-insert-write-to-libxl.patch 2016-06-01 13:18:54.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0005-libxl-cdrom-eject-and-insert-write-to-libxl.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,73 +0,0 @@ -From d637ba5a50f3c654f5caa7c251e623cace1640f8 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 29 Apr 2016 19:15:13 +0100 -Subject: [PATCH 05/21] libxl: cdrom eject and insert: write to /libxl - -Copy the new type and params values to /libxl, so that the information -in /libxl is kept up to date. - -This is needed so that we can return this trustworthy information, -rather than trusting the backend-writeable parts of xenstore. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 19 ++++++++++++++----- - 1 file changed, 14 insertions(+), 5 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index a1cc220..39a2e03 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -2896,7 +2896,7 @@ int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk, - libxl_domain_config d_config; - int rc, dm_ver; - libxl__device device; -- const char * path; -+ const char *path, *libxl_path; - char * tmp; - libxl__domain_userdata_lock *lock = NULL; - xs_transaction_t t = XBT_NULL; -@@ -2970,6 +2970,7 @@ int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk, - if (rc) goto out; - - path = libxl__device_backend_path(gc, &device); -+ libxl_path = libxl__device_libxl_path(gc, &device); - - insert = flexarray_make(gc, 4, 1); - -@@ -3018,8 +3019,12 @@ int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk, - goto out; - } - -- rc = libxl__xs_writev(gc, t, path, -- libxl__xs_kvs_of_flexarray(gc, empty, empty->count)); -+ char **kvs = libxl__xs_kvs_of_flexarray(gc, empty, empty->count); -+ -+ rc = libxl__xs_writev(gc, t, path, kvs); -+ if (rc) goto out; -+ -+ rc = libxl__xs_writev(gc, t, libxl_path, kvs); - if (rc) goto out; - - rc = libxl__xs_transaction_commit(gc, &t); -@@ -3056,8 +3061,12 @@ int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk, - rc = libxl__set_domain_configuration(gc, domid, &d_config); - if (rc) goto out; - -- rc = libxl__xs_writev(gc, t, path, -- libxl__xs_kvs_of_flexarray(gc, insert, insert->count)); -+ char **kvs = libxl__xs_kvs_of_flexarray(gc, insert, insert->count); -+ -+ rc = libxl__xs_writev(gc, t, path, kvs); -+ if (rc) goto out; -+ -+ rc = libxl__xs_writev(gc, t, libxl_path, kvs); - if (rc) goto out; - - rc = libxl__xs_transaction_commit(gc, &t); --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0006-libxl-Do-not-trust-backend-for-disk-eject-vdev.patch xen-4.6.5/debian/patches/xsa178-unstable-0006-libxl-Do-not-trust-backend-for-disk-eject-vdev.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0006-libxl-Do-not-trust-backend-for-disk-eject-vdev.patch 2016-06-01 13:27:28.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0006-libxl-Do-not-trust-backend-for-disk-eject-vdev.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,64 +0,0 @@ -From a38b1590b8d05f18f37755e981edd4cb51f1098d Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 29 Apr 2016 16:23:35 +0100 -Subject: [PATCH 06/21] libxl: Do not trust backend for disk eject vdev - -For disk eject, use configured vdev from /libxl, not backend. - -The backend directory is writeable by driver domains. This means that -a malicious driver domain could cause libxl to see a wrong vdev, -confusing the user or the toolstack. - -Use the vdev from the /libxl space, rather than the backend. - -For convenience, we read the vdev from the /libxl space into the evg -during setup and copy it on each event, rather than reading it afresh -each time (which would in any case involve generating or saving a copy -of the relevant /libxl path). - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 13 ++++++++----- - 1 file changed, 8 insertions(+), 5 deletions(-) - -Index: xen-4.6.0/tools/libxl/libxl.c -=================================================================== ---- xen-4.6.0.orig/tools/libxl/libxl.c 2016-06-01 15:20:46.742244957 +0200 -+++ xen-4.6.0/tools/libxl/libxl.c 2016-06-01 15:27:19.758238898 +0200 -@@ -1381,8 +1381,7 @@ static void disk_eject_xswatch_callback( - disk->pdev_path = strdup(""); /* xxx fixme malloc failure */ - disk->format = LIBXL_DISK_FORMAT_EMPTY; - /* this value is returned to the user: do not free right away */ -- disk->vdev = xs_read(CTX->xsh, XBT_NULL, -- libxl__sprintf(gc, "%s/dev", backend), NULL); -+ disk->vdev = libxl__strdup(NOGC, evg->vdev); - disk->removable = 1; - disk->readwrite = 0; - disk->is_cdrom = 1; -@@ -1405,9 +1404,6 @@ int libxl_evenable_disk_eject(libxl_ctx - evg->domid = guest_domid; - LIBXL_LIST_INSERT_HEAD(&CTX->disk_eject_evgens, evg, entry); - -- evg->vdev = strdup(vdev); -- if (!evg->vdev) { rc = ERROR_NOMEM; goto out; } -- - uint32_t domid = libxl_get_stubdom_id(ctx, guest_domid); - - if (!domid) -@@ -1425,6 +1421,13 @@ int libxl_evenable_disk_eject(libxl_ctx - devid); - evg->be_ptr_path = libxl__sprintf(NOGC, "%s/backend", libxl_path); - -+ const char *configured_vdev; -+ rc = libxl__xs_read_checked(gc, XBT_NULL, -+ GCSPRINTF("%s/vdev", libxl_path), &configured_vdev); -+ if (rc) goto out; -+ -+ evg->vdev = libxl__strdup(NOGC, configured_vdev); -+ - rc = libxl__ev_xswatch_register(gc, &evg->watch, - disk_eject_xswatch_callback, path); - if (rc) goto out; diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0007-libxl-Do-not-trust-backend-for-disk-fix-driver-domai.patch xen-4.6.5/debian/patches/xsa178-unstable-0007-libxl-Do-not-trust-backend-for-disk-fix-driver-domai.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0007-libxl-Do-not-trust-backend-for-disk-fix-driver-domai.patch 2016-06-01 18:02:40.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0007-libxl-Do-not-trust-backend-for-disk-fix-driver-domai.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,242 +0,0 @@ -From 7f08021df653dbc44898c70eacff143363d8cc5d Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 29 Apr 2016 18:29:45 +0100 -Subject: [PATCH 07/21] libxl: Do not trust backend for disk; fix driver domain - disks list - -Rework libxl__device_disk_from_xs_be (which takes a backend path) into -to libxl__device_disk_from_xenstore (which takes a libxl path). - -libxl__device_disk_from_xenstore now finds the backend path itself, -although it doesn't use it any more for most of its functions. We -rename the variable from be_path to backend_path to make sure we -didn't miss any cases. - -All the data collection is now done by reading from the copy in -/libxl. - -libxl_device_disk_list and its helper libxl__append_disk_list (which -used to be libxl__append_disk_list_of_type) need extensive rework, -because they now need to specify the /libxl path rather than the -backend path. - -To do that they enumerate disks by looking in the appropriate area in -/libxl. Previously they scanned various of the backend directories in -dom0 (which was broken for driver domains). It is no longer necessary -to enumerate the various disk backends, because they all use the same -paths in /devices. libxl__device_disk_from_xenstore will parse the -type out of the backend path, for itself. (Indeed, it did so before - -the now-gone type parameter to libxl__append_disk_list_of_type wasn't -used other than to construct the directory to list.) - -Finally, remove a redundant store to pdisk->backend_domid in -libxl__append_disk_list[_of_type]. Even before this commit, that -store was not needed because libxl_device_disk_init (called by -libxl__device_disk_from_xenstore) would zero it. Now it overwrites -the correct backend domid with zero; so remove it. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- -v2: Also fix up COLO reads, following rebase ---- - tools/libxl/libxl.c | 94 +++++++++++++++++++++++++++-------------------------- - 1 file changed, 48 insertions(+), 46 deletions(-) - -Index: xen-4.6.0/tools/libxl/libxl.c -=================================================================== ---- xen-4.6.0.orig/tools/libxl/libxl.c 2016-06-01 16:24:28.198186039 +0200 -+++ xen-4.6.0/tools/libxl/libxl.c 2016-06-01 20:02:36.161984251 +0200 -@@ -2642,8 +2642,8 @@ void libxl__device_disk_add(libxl__egc * - device_disk_add(egc, domid, disk, aodev, NULL, NULL); - } - --static int libxl__device_disk_from_xs_be(libxl__gc *gc, -- const char *be_path, -+static int libxl__device_disk_from_xenstore(libxl__gc *gc, -+ const char *libxl_path, - libxl_device_disk *disk) - { - libxl_ctx *ctx = libxl__gc_owner(gc); -@@ -2653,15 +2653,27 @@ static int libxl__device_disk_from_xs_be - - libxl_device_disk_init(disk); - -- rc = sscanf(be_path, "/local/domain/%d/", &disk->backend_domid); -+ const char *backend_path; -+ rc = libxl__xs_read_checked(gc, XBT_NULL, -+ GCSPRINTF("%s/backend", libxl_path), -+ &backend_path); -+ if (rc) goto out; -+ -+ if (!backend_path) { -+ LOG(ERROR, "disk %s does not exist (no backend path", libxl_path); -+ rc = ERROR_FAIL; -+ goto out; -+ } -+ -+ rc = sscanf(backend_path, "/local/domain/%d/", &disk->backend_domid); - if (rc != 1) { -- LOG(ERROR, "Unable to fetch device backend domid from %s", be_path); -+ LOG(ERROR, "Unable to fetch device backend domid from %s", backend_path); - goto cleanup; - } - - /* "params" may not be present; but everything else must be. */ - tmp = xs_read(ctx->xsh, XBT_NULL, -- libxl__sprintf(gc, "%s/params", be_path), &len); -+ GCSPRINTF("%s/params", libxl_path), &len); - if (tmp && strchr(tmp, ':')) { - disk->pdev_path = strdup(strchr(tmp, ':') + 1); - free(tmp); -@@ -2671,31 +2683,31 @@ static int libxl__device_disk_from_xs_be - - - tmp = libxl__xs_read(gc, XBT_NULL, -- libxl__sprintf(gc, "%s/type", be_path)); -+ GCSPRINTF("%s/type", libxl_path)); - if (!tmp) { -- LOG(ERROR, "Missing xenstore node %s/type", be_path); -+ LOG(ERROR, "Missing xenstore node %s/type", libxl_path); - goto cleanup; - } - libxl_string_to_backend(ctx, tmp, &(disk->backend)); - - disk->vdev = xs_read(ctx->xsh, XBT_NULL, -- libxl__sprintf(gc, "%s/dev", be_path), &len); -+ GCSPRINTF("%s/dev", libxl_path), &len); - if (!disk->vdev) { -- LOG(ERROR, "Missing xenstore node %s/dev", be_path); -+ LOG(ERROR, "Missing xenstore node %s/dev", libxl_path); - goto cleanup; - } - - tmp = libxl__xs_read(gc, XBT_NULL, libxl__sprintf -- (gc, "%s/removable", be_path)); -+ (gc, "%s/removable", libxl_path)); - if (!tmp) { -- LOG(ERROR, "Missing xenstore node %s/removable", be_path); -+ LOG(ERROR, "Missing xenstore node %s/removable", libxl_path); - goto cleanup; - } - disk->removable = atoi(tmp); - -- tmp = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/mode", be_path)); -+ tmp = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/mode", libxl_path)); - if (!tmp) { -- LOG(ERROR, "Missing xenstore node %s/mode", be_path); -+ LOG(ERROR, "Missing xenstore node %s/mode", libxl_path); - goto cleanup; - } - if (!strcmp(tmp, "w")) -@@ -2704,9 +2716,9 @@ static int libxl__device_disk_from_xs_be - disk->readwrite = 0; - - tmp = libxl__xs_read(gc, XBT_NULL, -- libxl__sprintf(gc, "%s/device-type", be_path)); -+ GCSPRINTF("%s/device-type", libxl_path)); - if (!tmp) { -- LOG(ERROR, "Missing xenstore node %s/device-type", be_path); -+ LOG(ERROR, "Missing xenstore node %s/device-type", libxl_path); - goto cleanup; - } - disk->is_cdrom = !strcmp(tmp, "cdrom"); -@@ -2715,15 +2727,17 @@ static int libxl__device_disk_from_xs_be - - return 0; - cleanup: -+ rc = ERROR_FAIL; -+ out: - libxl_device_disk_dispose(disk); -- return ERROR_FAIL; -+ return rc; - } - - int libxl_vdev_to_device_disk(libxl_ctx *ctx, uint32_t domid, - const char *vdev, libxl_device_disk *disk) - { - GC_INIT(ctx); -- char *dompath, *path; -+ char *dom_xl_path, *libxl_path; - int devid = libxl__device_disk_dev_number(vdev, NULL, NULL); - int rc = ERROR_FAIL; - -@@ -2732,39 +2746,34 @@ int libxl_vdev_to_device_disk(libxl_ctx - - libxl_device_disk_init(disk); - -- dompath = libxl__xs_get_dompath(gc, domid); -- if (!dompath) { -+ dom_xl_path = libxl__xs_libxl_path(gc, domid); -+ if (!dom_xl_path) { - goto out; - } -- path = libxl__xs_read(gc, XBT_NULL, -- libxl__sprintf(gc, "%s/device/vbd/%d/backend", -- dompath, devid)); -- if (!path) -- goto out; -+ libxl_path = GCSPRINTF("%s/device/vbd/%d", dom_xl_path, devid); - -- rc = libxl__device_disk_from_xs_be(gc, path, disk); -+ rc = libxl__device_disk_from_xenstore(gc, libxl_path, disk); - out: - GC_FREE; - return rc; - } - - --static int libxl__append_disk_list_of_type(libxl__gc *gc, -+static int libxl__append_disk_list(libxl__gc *gc, - uint32_t domid, -- const char *type, - libxl_device_disk **disks, - int *ndisks) - { -- char *be_path = NULL; -+ char *libxl_dir_path = NULL; - char **dir = NULL; - unsigned int n = 0; - libxl_device_disk *pdisk = NULL, *pdisk_end = NULL; - int rc=0; - int initial_disks = *ndisks; - -- be_path = libxl__sprintf(gc, "%s/backend/%s/%d", -- libxl__xs_get_dompath(gc, 0), type, domid); -- dir = libxl__xs_directory(gc, XBT_NULL, be_path, &n); -+ libxl_dir_path = GCSPRINTF("%s/device/vbd", -+ libxl__xs_libxl_path(gc, domid)); -+ dir = libxl__xs_directory(gc, XBT_NULL, libxl_dir_path, &n); - if (dir && n) { - libxl_device_disk *tmp; - tmp = realloc(*disks, sizeof (libxl_device_disk) * (*ndisks + n)); -@@ -2775,10 +2784,9 @@ static int libxl__append_disk_list_of_ty - pdisk_end = *disks + initial_disks + n; - for (; pdisk < pdisk_end; pdisk++, dir++) { - const char *p; -- p = libxl__sprintf(gc, "%s/%s", be_path, *dir); -- if ((rc=libxl__device_disk_from_xs_be(gc, p, pdisk))) -+ p = GCSPRINTF("%s/%s", libxl_dir_path, *dir); -+ if ((rc=libxl__device_disk_from_xenstore(gc, p, pdisk))) - goto out; -- pdisk->backend_domid = 0; - *ndisks += 1; - } - } -@@ -2794,13 +2802,7 @@ libxl_device_disk *libxl_device_disk_lis - - *num = 0; - -- rc = libxl__append_disk_list_of_type(gc, domid, "vbd", &disks, num); -- if (rc) goto out_err; -- -- rc = libxl__append_disk_list_of_type(gc, domid, "tap", &disks, num); -- if (rc) goto out_err; -- -- rc = libxl__append_disk_list_of_type(gc, domid, "qdisk", &disks, num); -+ rc = libxl__append_disk_list(gc, domid, &disks, num); - if (rc) goto out_err; - - GC_FREE; diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0008-libxl-Do-not-trust-backend-for-disk-in-getinfo.patch xen-4.6.5/debian/patches/xsa178-unstable-0008-libxl-Do-not-trust-backend-for-disk-in-getinfo.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0008-libxl-Do-not-trust-backend-for-disk-in-getinfo.patch 2016-06-02 07:17:29.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0008-libxl-Do-not-trust-backend-for-disk-in-getinfo.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -From 4e47263f25956f015ce8054c37473511c066a0b1 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 29 Apr 2016 19:10:45 +0100 -Subject: [PATCH 08/21] libxl: Do not trust backend for disk in getinfo - -Do not read the frontend path out of the backend. We have it in our -hand. Likewise the guest (frontend) domid was one of our parameters (!) - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -Index: xen-4.6.0/tools/libxl/libxl.c -=================================================================== ---- xen-4.6.0.orig/tools/libxl/libxl.c 2016-06-02 09:14:10.691777996 +0200 -+++ xen-4.6.0/tools/libxl/libxl.c 2016-06-02 09:17:25.707774990 +0200 -@@ -2852,9 +2852,8 @@ int libxl_device_disk_getinfo(libxl_ctx - val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/ring-ref", fe_path)); - diskinfo->rref = val ? strtoul(val, NULL, 10) : -1; - diskinfo->frontend = xs_read(ctx->xsh, XBT_NULL, -- libxl__sprintf(gc, "%s/frontend", diskinfo->backend), NULL); -- val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/frontend-id", diskinfo->backend)); -- diskinfo->frontend_id = val ? strtoul(val, NULL, 10) : -1; -+ GCSPRINTF("%s/frontend", libxl_path), NULL); -+ diskinfo->frontend_id = domid; - - GC_FREE; - return 0; diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0009-libxl-Do-not-trust-backend-for-cdrom-insert.patch xen-4.6.5/debian/patches/xsa178-unstable-0009-libxl-Do-not-trust-backend-for-cdrom-insert.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0009-libxl-Do-not-trust-backend-for-cdrom-insert.patch 2016-06-02 07:23:33.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0009-libxl-Do-not-trust-backend-for-cdrom-insert.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,91 +0,0 @@ -From 5399ab94a9224b4a826fd5c6a1b8b258292d1efd Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 29 Apr 2016 19:13:17 +0100 -Subject: [PATCH 09/21] libxl: Do not trust backend for cdrom insert - -Use the /libxl path where appropriate. Rename `path' variable to -`be_path' to make sure we caught all the occurrences. - -Specifically, when checking that the device still exists, check the -`frontend' value in /libxl, rather than anything in the backend -directory. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 20 ++++++++++---------- - 1 file changed, 10 insertions(+), 10 deletions(-) - -Index: xen-4.6.0/tools/libxl/libxl.c -=================================================================== ---- xen-4.6.0.orig/tools/libxl/libxl.c 2016-06-02 09:18:27.579774036 +0200 -+++ xen-4.6.0/tools/libxl/libxl.c 2016-06-02 09:23:28.183769401 +0200 -@@ -2872,7 +2872,7 @@ int libxl_cdrom_insert(libxl_ctx *ctx, u - libxl_domain_config d_config; - int rc, dm_ver; - libxl__device device; -- const char *path, *libxl_path; -+ const char *be_path, *libxl_path; - char * tmp; - libxl__domain_userdata_lock *lock = NULL; - xs_transaction_t t = XBT_NULL; -@@ -2939,7 +2939,7 @@ int libxl_cdrom_insert(libxl_ctx *ctx, u - rc = libxl__device_from_disk(gc, domid, disk, &device); - if (rc) goto out; - -- path = libxl__device_backend_path(gc, &device); -+ be_path = libxl__device_backend_path(gc, &device); - libxl_path = libxl__device_libxl_path(gc, &device); - - insert = flexarray_make(gc, 4, 1); -@@ -2979,19 +2979,19 @@ int libxl_cdrom_insert(libxl_ctx *ctx, u - for (;;) { - rc = libxl__xs_transaction_start(gc, &t); - if (rc) goto out; -- /* Sanity check: make sure the backend exists before writing here */ -- tmp = libxl__xs_read(gc, t, libxl__sprintf(gc, "%s/frontend", path)); -+ /* Sanity check: make sure the device exists before writing here */ -+ tmp = libxl__xs_read(gc, t, GCSPRINTF("%s/frontend", libxl_path)); - if (!tmp) - { - LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Internal error: %s does not exist", -- libxl__sprintf(gc, "%s/frontend", path)); -+ GCSPRINTF("%s/frontend", libxl_path)); - rc = ERROR_FAIL; - goto out; - } - - char **kvs = libxl__xs_kvs_of_flexarray(gc, empty, empty->count); - -- rc = libxl__xs_writev(gc, t, path, kvs); -+ rc = libxl__xs_writev(gc, t, be_path, kvs); - if (rc) goto out; - - rc = libxl__xs_writev(gc, t, libxl_path, kvs); -@@ -3015,12 +3015,12 @@ int libxl_cdrom_insert(libxl_ctx *ctx, u - for (;;) { - rc = libxl__xs_transaction_start(gc, &t); - if (rc) goto out; -- /* Sanity check: make sure the backend exists before writing here */ -- tmp = libxl__xs_read(gc, t, libxl__sprintf(gc, "%s/frontend", path)); -+ /* Sanity check: make sure the device exists before writing here */ -+ tmp = libxl__xs_read(gc, t, GCSPRINTF("%s/frontend", libxl_path)); - if (!tmp) - { - LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Internal error: %s does not exist", -- libxl__sprintf(gc, "%s/frontend", path)); -+ GCSPRINTF("%s/frontend", libxl_path)); - rc = ERROR_FAIL; - goto out; - } -@@ -3030,7 +3030,7 @@ int libxl_cdrom_insert(libxl_ctx *ctx, u - - char **kvs = libxl__xs_kvs_of_flexarray(gc, insert, insert->count); - -- rc = libxl__xs_writev(gc, t, path, kvs); -+ rc = libxl__xs_writev(gc, t, be_path, kvs); - if (rc) goto out; - - rc = libxl__xs_writev(gc, t, libxl_path, kvs); diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0010-libxl-Do-not-trust-backend-for-channel-in-getinfo.patch xen-4.6.5/debian/patches/xsa178-unstable-0010-libxl-Do-not-trust-backend-for-channel-in-getinfo.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0010-libxl-Do-not-trust-backend-for-channel-in-getinfo.patch 2016-06-02 07:24:01.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0010-libxl-Do-not-trust-backend-for-channel-in-getinfo.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ -From b2362b04e2d5fbd1a39019adf9e7e5f85cbdf2e1 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 4 May 2016 15:57:10 +0100 -Subject: [PATCH 10/21] libxl: Do not trust backend for channel in getinfo - -Do not read the frontend path out of the backend. We have it in our -hand. Likewise the guest (frontend) domid was one of our parameters (!) - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 8 ++------ - 1 file changed, 2 insertions(+), 6 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index ded2040..05f3ba1 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -4065,12 +4065,8 @@ int libxl_device_channel_getinfo(libxl_ctx *ctx, uint32_t domid, - - val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/state", fe_path)); - channelinfo->state = val ? strtoul(val, NULL, 10) : -1; -- channelinfo->frontend = xs_read(ctx->xsh, XBT_NULL, -- GCSPRINTF("%s/frontend", -- channelinfo->backend), NULL); -- val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/frontend-id", -- channelinfo->backend)); -- channelinfo->frontend_id = val ? strtoul(val, NULL, 10) : -1; -+ channelinfo->frontend = libxl__strdup(NOGC, fe_path); -+ channelinfo->frontend_id = domid; - val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/ring-ref", fe_path)); - channelinfo->rref = val ? strtoul(val, NULL, 10) : -1; - val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/port", fe_path)); --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0011-libxl-Rename-libxl__device_-nic-channel-_from_xs_be-.patch xen-4.6.5/debian/patches/xsa178-unstable-0011-libxl-Rename-libxl__device_-nic-channel-_from_xs_be-.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0011-libxl-Rename-libxl__device_-nic-channel-_from_xs_be-.patch 2016-06-02 07:27:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0011-libxl-Rename-libxl__device_-nic-channel-_from_xs_be-.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,84 +0,0 @@ -From 47ac7a8558135553ebb190fbfef3438ce16c3581 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 4 May 2016 16:18:36 +0100 -Subject: [PATCH 11/21] libxl: Rename libxl__device_{nic,channel}_from_xs_be to - _from_xenstore - -We are going to change these functions to expect, and be passed, a -/libxl path. So it is wrong that they are called _from_xs_be. - -Neither function reads anything which isn't found in both places, so -we can and will change the call sites later. - -The only remaining function in libxl called *_from_xs_be relates to -PCI devices, for which the backend domain is hardcoded to 0 throughout -the libxl_pci.c. - -No functional change. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -Index: xen-4.6.0/tools/libxl/libxl.c -=================================================================== ---- xen-4.6.0.orig/tools/libxl/libxl.c 2016-06-02 09:25:34.379767455 +0200 -+++ xen-4.6.0/tools/libxl/libxl.c 2016-06-02 09:26:56.367766191 +0200 -@@ -3470,7 +3470,7 @@ out: - return; - } - --static int libxl__device_nic_from_xs_be(libxl__gc *gc, -+static int libxl__device_nic_from_xenstore(libxl__gc *gc, - const char *be_path, - libxl_device_nic *nic) - { -@@ -3533,7 +3533,7 @@ int libxl_devid_to_device_nic(libxl_ctx - if (!path) - goto out; - -- rc = libxl__device_nic_from_xs_be(gc, path, nic); -+ rc = libxl__device_nic_from_xenstore(gc, path, nic); - if (rc) goto out; - - rc = 0; -@@ -3568,7 +3568,7 @@ static int libxl__append_nic_list_of_typ - for (; pnic < pnic_end; pnic++, dir++) { - const char *p; - p = libxl__sprintf(gc, "%s/%s", be_path, *dir); -- rc = libxl__device_nic_from_xs_be(gc, p, pnic); -+ rc = libxl__device_nic_from_xenstore(gc, p, pnic); - if (rc) goto out; - pnic->backend_domid = 0; - } -@@ -3818,7 +3818,7 @@ int libxl__init_console_from_channel(lib - return 0; - } - --static int libxl__device_channel_from_xs_be(libxl__gc *gc, -+static int libxl__device_channel_from_xenstore(libxl__gc *gc, - const char *be_path, - libxl_device_channel *channel) - { -@@ -3827,7 +3827,7 @@ static int libxl__device_channel_from_xs - - libxl_device_channel_init(channel); - -- /* READ_BACKEND is from libxl__device_nic_from_xs_be above */ -+ /* READ_BACKEND is from libxl__device_nic_from_xenstore above */ - channel->name = READ_BACKEND(NOGC, "name"); - tmp = READ_BACKEND(gc, "connection"); - if (!strcmp(tmp, "pty")) { -@@ -3882,7 +3882,7 @@ static int libxl__append_channel_list(li - } - *channels = tmp; - next = *channels + *nchannels + devid; -- rc = libxl__device_channel_from_xs_be(gc, be_path, next); -+ rc = libxl__device_channel_from_xenstore(gc, be_path, next); - if (rc) goto out; - next->devid = devid; - devid++; diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0012-libxl-Rename-READ_BACKEND-to-READ_LIBXLDEV.patch xen-4.6.5/debian/patches/xsa178-unstable-0012-libxl-Rename-READ_BACKEND-to-READ_LIBXLDEV.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0012-libxl-Rename-READ_BACKEND-to-READ_LIBXLDEV.patch 2016-06-02 07:30:36.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0012-libxl-Rename-READ_BACKEND-to-READ_LIBXLDEV.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,100 +0,0 @@ -From 729ba26c1180288fd93585af4328482e60babf2a Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 4 May 2016 16:07:02 +0100 -Subject: [PATCH 12/21] libxl: Rename READ_BACKEND to READ_LIBXLDEV - -We are going to want to change all the functions that use READ_BACKEND -to get untrustworthy information from the backend, to use trustworthy -information from /libxl. - -This will involve replacing READ_BACKEND, which reads from be_path, -with a similar macro READ_LIBXLDEV, which reads from libxl_path. - -The macro name change generates a lot of clutter in the diff. So we -break it out into this separate patch. Here, we rename the macro, but -the implementation does not really match the new name. - -So, another way to look at this, is that we have transformed the bug: - * All of the backends use READ_BACKEND, which is unsafe -into the new bug: - * READ_LIBXLDEV actually reads be_path, which is unsafe. - -There is no functional change as yet. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -Index: xen-4.6.0/tools/libxl/libxl.c -=================================================================== ---- xen-4.6.0.orig/tools/libxl/libxl.c 2016-06-02 09:28:01.023765194 +0200 -+++ xen-4.6.0/tools/libxl/libxl.c 2016-06-02 09:30:33.195762848 +0200 -@@ -21,8 +21,8 @@ - #define PAGE_TO_MEMKB(pages) ((pages) * 4) - #define BACKEND_STRING_SIZE 5 - --/* Utility to read backend xenstore keys */ --#define READ_BACKEND(tgc, subpath) ({ \ -+/* Utility to read /libxl or backend xenstore keys, from be_path */ -+#define READ_LIBXLDEV(tgc, subpath) ({ \ - rc = libxl__xs_read_checked(tgc, XBT_NULL, \ - GCSPRINTF("%s/" subpath, be_path), \ - &tmp); \ -@@ -3479,7 +3479,7 @@ static int libxl__device_nic_from_xensto - - libxl_device_nic_init(nic); - -- tmp = READ_BACKEND(gc, "handle"); -+ tmp = READ_LIBXLDEV(gc, "handle"); - if (tmp) - nic->devid = atoi(tmp); - else -@@ -3487,7 +3487,7 @@ static int libxl__device_nic_from_xensto - - /* nic->mtu = */ - -- tmp = READ_BACKEND(gc, "mac"); -+ tmp = READ_LIBXLDEV(gc, "mac"); - if (tmp) { - rc = libxl__parse_mac(tmp, nic->mac); - if (rc) goto out; -@@ -3495,12 +3495,12 @@ static int libxl__device_nic_from_xensto - memset(nic->mac, 0, sizeof(nic->mac)); - } - -- nic->ip = READ_BACKEND(NOGC, "ip"); -- nic->bridge = READ_BACKEND(NOGC, "bridge"); -- nic->script = READ_BACKEND(NOGC, "script"); -+ nic->ip = READ_LIBXLDEV(NOGC, "ip"); -+ nic->bridge = READ_LIBXLDEV(NOGC, "bridge"); -+ nic->script = READ_LIBXLDEV(NOGC, "script"); - - /* vif_ioemu nics use the same xenstore entries as vif interfaces */ -- tmp = READ_BACKEND(gc, "type"); -+ tmp = READ_LIBXLDEV(gc, "type"); - if (tmp) { - rc = libxl_nic_type_from_string(tmp, &nic->nictype); - if (rc) goto out; -@@ -3827,14 +3827,14 @@ static int libxl__device_channel_from_xe - - libxl_device_channel_init(channel); - -- /* READ_BACKEND is from libxl__device_nic_from_xenstore above */ -- channel->name = READ_BACKEND(NOGC, "name"); -- tmp = READ_BACKEND(gc, "connection"); -+ /* READ_LIBXLDEV is from libxl__device_nic_from_xenstore above */ -+ channel->name = READ_LIBXLDEV(NOGC, "name"); -+ tmp = READ_LIBXLDEV(gc, "connection"); - if (!strcmp(tmp, "pty")) { - channel->connection = LIBXL_CHANNEL_CONNECTION_PTY; - } else if (!strcmp(tmp, "socket")) { - channel->connection = LIBXL_CHANNEL_CONNECTION_SOCKET; -- channel->u.socket.path = READ_BACKEND(NOGC, "path"); -+ channel->u.socket.path = READ_LIBXLDEV(NOGC, "path"); - } else { - rc = ERROR_INVAL; - goto out; diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0013-libxl-Have-READ_LIBXLDEV-use-libxl_path-rather-than-.patch xen-4.6.5/debian/patches/xsa178-unstable-0013-libxl-Have-READ_LIBXLDEV-use-libxl_path-rather-than-.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0013-libxl-Have-READ_LIBXLDEV-use-libxl_path-rather-than-.patch 2016-06-02 07:30:43.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0013-libxl-Have-READ_LIBXLDEV-use-libxl_path-rather-than-.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,62 +0,0 @@ -From 8e37e743331110b4fec5928689d74dceda5eb608 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Tue, 3 May 2016 15:40:18 +0100 -Subject: [PATCH 13/21] libxl: Have READ_LIBXLDEV use libxl_path rather than - be_path - -Fix the just-introduced bug in this macro: now it reads the -trustworthy libxl_path. Change the variable name in the two functions -(nic and channel) which use it. - -Shuffling the bump in the carpet along, we now introduce three new -bugs: the three call sites pass a backend path where a frontend path -is expected. - -No functional change. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index b5578cb..d7d7775 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -21,10 +21,10 @@ - #define PAGE_TO_MEMKB(pages) ((pages) * 4) - #define BACKEND_STRING_SIZE 5 - --/* Utility to read /libxl or backend xenstore keys, from be_path */ -+/* Utility to read /libxl xenstore keys, from libxl_path */ - #define READ_LIBXLDEV(tgc, subpath) ({ \ - rc = libxl__xs_read_checked(tgc, XBT_NULL, \ -- GCSPRINTF("%s/" subpath, be_path), \ -+ GCSPRINTF("%s/" subpath, libxl_path), \ - &tmp); \ - if (rc) goto out; \ - (char*)tmp; \ -@@ -3586,7 +3586,7 @@ out: - } - - static int libxl__device_nic_from_xenstore(libxl__gc *gc, -- const char *be_path, -+ const char *libxl_path, - libxl_device_nic *nic) - { - const char *tmp; -@@ -3935,7 +3935,7 @@ int libxl__init_console_from_channel(libxl__gc *gc, - } - - static int libxl__device_channel_from_xenstore(libxl__gc *gc, -- const char *be_path, -+ const char *libxl_path, - libxl_device_channel *channel) - { - const char *tmp; --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0014-libxl-Do-not-trust-backend-in-nic-getinfo.patch xen-4.6.5/debian/patches/xsa178-unstable-0014-libxl-Do-not-trust-backend-in-nic-getinfo.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0014-libxl-Do-not-trust-backend-in-nic-getinfo.patch 2016-06-02 07:33:20.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0014-libxl-Do-not-trust-backend-in-nic-getinfo.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,30 +0,0 @@ -From 9eb1f76bc67f7cf5a9fb86f3aaf01fe2932de1fa Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Tue, 3 May 2016 16:35:21 +0100 -Subject: [PATCH 14/21] libxl: Do not trust backend in nic getinfo - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - -Index: xen-4.6.0/tools/libxl/libxl.c -=================================================================== ---- xen-4.6.0.orig/tools/libxl/libxl.c 2016-06-02 09:31:38.995761834 +0200 -+++ xen-4.6.0/tools/libxl/libxl.c 2016-06-02 09:33:17.307760318 +0200 -@@ -3636,10 +3636,8 @@ int libxl_device_nic_getinfo(libxl_ctx * - nicinfo->rref_tx = val ? strtoul(val, NULL, 10) : -1; - val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/rx-ring-ref", nicpath)); - nicinfo->rref_rx = val ? strtoul(val, NULL, 10) : -1; -- nicinfo->frontend = xs_read(ctx->xsh, XBT_NULL, -- libxl__sprintf(gc, "%s/frontend", nicinfo->backend), NULL); -- val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/frontend-id", nicinfo->backend)); -- nicinfo->frontend_id = val ? strtoul(val, NULL, 10) : -1; -+ nicinfo->frontend = libxl__strdup(NOGC, nicpath); -+ nicinfo->frontend_id = domid; - - rc = 0; - out: diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0015-libxl-Do-not-trust-backend-for-nic-in-devid_to_devic.patch xen-4.6.5/debian/patches/xsa178-unstable-0015-libxl-Do-not-trust-backend-for-nic-in-devid_to_devic.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0015-libxl-Do-not-trust-backend-for-nic-in-devid_to_devic.patch 2016-06-02 07:33:43.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0015-libxl-Do-not-trust-backend-for-nic-in-devid_to_devic.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,48 +0,0 @@ -From df1c2b3e2b3412c851a7ecaa056d1653d2f9f650 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 4 May 2016 16:20:05 +0100 -Subject: [PATCH 15/21] libxl: Do not trust backend for nic in devid_to_device - -libxl_devid_to_device_nic should read the information it needs from -the /libxl/device path, not the backend. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 10 +++------- - 1 file changed, 3 insertions(+), 7 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index 0f6648a..8cc9114 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -3635,7 +3635,7 @@ int libxl_devid_to_device_nic(libxl_ctx *ctx, uint32_t domid, - int devid, libxl_device_nic *nic) - { - GC_INIT(ctx); -- char *libxl_dom_path, *path; -+ char *libxl_dom_path, *libxl_path; - int rc = ERROR_FAIL; - - libxl_device_nic_init(nic); -@@ -3643,13 +3643,9 @@ int libxl_devid_to_device_nic(libxl_ctx *ctx, uint32_t domid, - if (!libxl_dom_path) - goto out; - -- path = libxl__xs_read(gc, XBT_NULL, -- GCSPRINTF("%s/device/vif/%d/backend", libxl_dom_path, -- devid)); -- if (!path) -- goto out; -+ libxl_path = GCSPRINTF("%s/device/vif/%d", libxl_dom_path, devid); - -- rc = libxl__device_nic_from_xenstore(gc, path, nic); -+ rc = libxl__device_nic_from_xenstore(gc, libxl_path, nic); - if (rc) goto out; - - rc = 0; --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0016-libxl-Do-not-trust-backend-for-nic-in-list.patch xen-4.6.5/debian/patches/xsa178-unstable-0016-libxl-Do-not-trust-backend-for-nic-in-list.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0016-libxl-Do-not-trust-backend-for-nic-in-list.patch 2016-06-02 07:38:41.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0016-libxl-Do-not-trust-backend-for-nic-in-list.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,77 +0,0 @@ -From ee0b02e920847b5ff198f0d43968cda9c544c983 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 4 May 2016 16:23:57 +0100 -Subject: [PATCH 16/21] libxl: Do not trust backend for nic in list - -libxl_device_nic_list should use the /libxl path to search for -devices, and for obtaining the device information. - -The "type" parameter was always "vif". Abolish it. (In any case, -paths in /libxl/device are named after the frontend type which is -constant, not the backend type which might in future vary.) - -Abolish a redundant store to pnic->backend_domid. Before this commit, -that store was not needed because libxl_device_nic_init (called by -libxl__device_nic_from_xenstore) would zero it. Now it overwrites the -correct backend domid with zero; so remove it. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 16 +++++++--------- - 1 file changed, 7 insertions(+), 9 deletions(-) - -Index: xen-4.6.0/tools/libxl/libxl.c -=================================================================== ---- xen-4.6.0.orig/tools/libxl/libxl.c 2016-06-02 09:34:37.063759088 +0200 -+++ xen-4.6.0/tools/libxl/libxl.c 2016-06-02 09:38:38.019755373 +0200 -@@ -3538,21 +3538,20 @@ out: - return rc; - } - --static int libxl__append_nic_list_of_type(libxl__gc *gc, -+static int libxl__append_nic_list(libxl__gc *gc, - uint32_t domid, -- const char *type, - libxl_device_nic **nics, - int *nnics) - { -- char *be_path = NULL; -+ char *libxl_dir_path = NULL; - char **dir = NULL; - unsigned int n = 0; - libxl_device_nic *pnic = NULL, *pnic_end = NULL; - int rc; - -- be_path = libxl__sprintf(gc, "%s/backend/%s/%d", -- libxl__xs_get_dompath(gc, 0), type, domid); -- dir = libxl__xs_directory(gc, XBT_NULL, be_path, &n); -+ libxl_dir_path = GCSPRINTF("%s/device/vif", -+ libxl__xs_libxl_path(gc, domid)); -+ dir = libxl__xs_directory(gc, XBT_NULL, libxl_dir_path, &n); - if (dir && n) { - libxl_device_nic *tmp; - tmp = realloc(*nics, sizeof (libxl_device_nic) * (*nnics + n)); -@@ -3563,10 +3562,9 @@ static int libxl__append_nic_list_of_typ - pnic_end = *nics + *nnics + n; - for (; pnic < pnic_end; pnic++, dir++) { - const char *p; -- p = libxl__sprintf(gc, "%s/%s", be_path, *dir); -+ p = GCSPRINTF("%s/%s", libxl_dir_path, *dir); - rc = libxl__device_nic_from_xenstore(gc, p, pnic); - if (rc) goto out; -- pnic->backend_domid = 0; - } - *nnics += n; - } -@@ -3584,7 +3582,7 @@ libxl_device_nic *libxl_device_nic_list( - - *num = 0; - -- rc = libxl__append_nic_list_of_type(gc, domid, "vif", &nics, num); -+ rc = libxl__append_nic_list(gc, domid, &nics, num); - if (rc) goto out_err; - - GC_FREE; diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0017-libxl-Do-not-trust-backend-in-channel-list.patch xen-4.6.5/debian/patches/xsa178-unstable-0017-libxl-Do-not-trust-backend-in-channel-list.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0017-libxl-Do-not-trust-backend-in-channel-list.patch 2016-06-02 07:38:53.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0017-libxl-Do-not-trust-backend-in-channel-list.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -From 69f618db0f3c4b31e2da51793a60e53a7e6706e1 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 4 May 2016 16:59:38 +0100 -Subject: [PATCH 17/21] libxl: Do not trust backend in channel list - -Read the name from /libxl/device. Pass the /libxl path to -libxl__device_channel_from_xenstore. - -This removes the final route by which READ_LIBXLDEV might receive a -backend path. - -This is part of XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- -v2: Remove be_path variable which is now no longer used. ---- - tools/libxl/libxl.c | 9 +++------ - 1 file changed, 3 insertions(+), 6 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index daa3417..bfd1ff7 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -3958,7 +3958,7 @@ static int libxl__append_channel_list(libxl__gc *gc, - libxl_device_channel **channels, - int *nchannels) - { -- char *libxl_dir_path = NULL, *be_path = NULL; -+ char *libxl_dir_path = NULL; - char **dir = NULL; - unsigned int n = 0, devid = 0; - libxl_device_channel *next = NULL; -@@ -3975,10 +3975,7 @@ static int libxl__append_channel_list(libxl__gc *gc, - libxl_device_channel *tmp; - - libxl_path = GCSPRINTF("%s/%s", libxl_dir_path, dir[i]); -- be_path = libxl__xs_read(gc, XBT_NULL, -- GCSPRINTF("%s/backend", libxl_path)); -- if (!be_path) continue; -- name = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/name", be_path)); -+ name = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/name", libxl_path)); - /* 'channels' are consoles with names, so ignore all consoles - without names */ - if (!name) continue; -@@ -3990,7 +3987,7 @@ static int libxl__append_channel_list(libxl__gc *gc, - } - *channels = tmp; - next = *channels + *nchannels + devid; -- rc = libxl__device_channel_from_xenstore(gc, be_path, next); -+ rc = libxl__device_channel_from_xenstore(gc, libxl_path, next); - if (rc) goto out; - next->devid = devid; - devid++; --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0018-libxl-Cleanup-use-libxl__backendpath_parse_domid-in-.patch xen-4.6.5/debian/patches/xsa178-unstable-0018-libxl-Cleanup-use-libxl__backendpath_parse_domid-in-.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0018-libxl-Cleanup-use-libxl__backendpath_parse_domid-in-.patch 2016-06-07 14:24:25.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0018-libxl-Cleanup-use-libxl__backendpath_parse_domid-in-.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ -From 5225dbb47ce646cd0cdc1aace8b2c1581bdff7ff Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Fri, 29 Apr 2016 16:08:19 +0100 -Subject: [PATCH 2/3] libxl: Cleanup: use libxl__backendpath_parse_domid in - libxl__device_disk_from_xs_be - -Rather than an open-coded sscanf. No functional change with correct -input. - -This is a followup to XSA-175 and XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu ---- - tools/libxl/libxl.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index bfd7df9..ac4a668 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -2665,10 +2665,10 @@ static int libxl__device_disk_from_xenstore(libxl__gc *gc, - goto out; - } - -- rc = sscanf(backend_path, "/local/domain/%d/", &disk->backend_domid); -- if (rc != 1) { -+ rc = libxl__backendpath_parse_domid(gc, backend_path, &disk->backend_domid); -+ if (rc) { - LOG(ERROR, "Unable to fetch device backend domid from %s", backend_path); -- goto cleanup; -+ goto out; - } - - /* "params" may not be present; but everything else must be. */ --- -1.9.1 - diff -Nru xen-4.6.0/debian/patches/xsa178-unstable-0019-libxl-Fix-NULL-pointer-due-to-XSA-178-fix-wrong-XS-n.patch xen-4.6.5/debian/patches/xsa178-unstable-0019-libxl-Fix-NULL-pointer-due-to-XSA-178-fix-wrong-XS-n.patch --- xen-4.6.0/debian/patches/xsa178-unstable-0019-libxl-Fix-NULL-pointer-due-to-XSA-178-fix-wrong-XS-n.patch 2016-06-09 13:14:01.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa178-unstable-0019-libxl-Fix-NULL-pointer-due-to-XSA-178-fix-wrong-XS-n.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -From 8b7a356409023f60f80e9f4b00bba16ad56cd77b Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Wed, 8 Jun 2016 15:42:19 +0100 -Subject: [PATCH] libxl: Fix NULL pointer due to XSA-178 fix wrong XS nodename - -In "libxl: Do not trust backend for disk eject vdev" (c69871a2fb26 on -xen.git#staging) we changed libxl_evenable_disk_eject to read the -device vdev out of xenstore from the /libxl path, rather than the -backend path, and to read it during setup rather than on each event. - -However, the patch has a mistake: - - GCSPRINTF("%s/dev", backend), NULL); - + GCSPRINTF("%s/vdev", libxl_path), &configured_vdev); - ^ -Spot the extra "v". This causes configured_vdev always to be NULL. -configured_vdev is passed to [libxl__]strdup. - -In Xen 4.6 and later libxl__strdup is used and tolerates NULL. -evg->vdev is set to NULL. This propagates to the `vdev' field in the -generated event. This may or may not cause further trouble, depending -on the calling application. In our osstest test cases it does not -cause any trouble, so the bug goes undetected. - -In Xen 4.5 and earlier, the strdup does not tolerate NULL, and libxl -crashes immediately. This has been detected by osstest as a -regression in Xen 4.5. - -IMO this patch should be applied immediately to - xen.git#staging-4.5 (to check that it fixes the osstest regression) - xen.git#staging (to check that it does not break master - -Subject to passes, it should then be propagated to all supported -stable trees and also be mentioned in an update to XSA-178. - -Signed-off-by: Ian Jackson -Reviewed-by: Wei Liu -CC: security@xenproject.org -CC: Jan Beulich -CC: Wei Liu -(cherry picked from commit 62b4d4769ca39fd5263da20d786a7b9a80a22d9a) -Signed-off-by: Stefan Bader ---- - tools/libxl/libxl.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index f22e01e..d60fe5d 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -1423,7 +1423,7 @@ int libxl_evenable_disk_eject(libxl_ctx *ctx, uint32_t guest_domid, - - const char *configured_vdev; - rc = libxl__xs_read_checked(gc, XBT_NULL, -- GCSPRINTF("%s/vdev", libxl_path), &configured_vdev); -+ GCSPRINTF("%s/dev", libxl_path), &configured_vdev); - if (rc) goto out; - - evg->vdev = libxl__strdup(NOGC, configured_vdev); --- -1.9.1 - diff -Nru xen-4.6.0/debian/patches/xsa181.patch xen-4.6.5/debian/patches/xsa181.patch --- xen-4.6.0/debian/patches/xsa181.patch 2016-06-07 14:24:44.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa181.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ -From ee488e2133e581967d13d5287d7bd654e9b2e2a6 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 2 Jun 2016 14:19:00 +0100 -Subject: [PATCH] xen/arm: Don't free p2m->root in p2m_teardown() before it has - been allocated - -If p2m_init() didn't complete successfully, (e.g. due to VMID -exhaustion), p2m_teardown() is called and unconditionally tries to free -p2m->root before it has been allocated. free_domheap_pages() doesn't -tolerate NULL pointers. - -This is XSA-181 - -Reported-by: Aaron Cornelius -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -Reviewed-by: Julien Grall ---- - xen/arch/arm/p2m.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c -index 838d004..6a19c57 100644 ---- a/xen/arch/arm/p2m.c -+++ b/xen/arch/arm/p2m.c -@@ -1408,7 +1408,8 @@ void p2m_teardown(struct domain *d) - while ( (pg = page_list_remove_head(&p2m->pages)) ) - free_domheap_page(pg); - -- free_domheap_pages(p2m->root, P2M_ROOT_ORDER); -+ if ( p2m->root ) -+ free_domheap_pages(p2m->root, P2M_ROOT_ORDER); - - p2m->root = NULL; - --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa182-4.6.patch xen-4.6.5/debian/patches/xsa182-4.6.patch --- xen-4.6.0/debian/patches/xsa182-4.6.patch 2016-10-06 13:52:41.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa182-4.6.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,102 +0,0 @@ -From f48a75b0c10ac79b287ca2b580ecb9ea2f696607 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 11 Jul 2016 14:32:03 +0100 -Subject: [PATCH] x86/pv: Remove unsafe bits from the mod_l?_entry() fastpath - -All changes in writeability and cacheability must go through full -re-validation. - -Rework the logic as a whitelist, to make it clearer to follow. - -This is XSA-182 - -Reported-by: Jérémie Boutoille -Signed-off-by: Andrew Cooper -Reviewed-by: Tim Deegan ---- - xen/arch/x86/mm.c | 28 ++++++++++++++++------------ - xen/include/asm-x86/page.h | 1 + - 2 files changed, 17 insertions(+), 12 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index daf02ab..8dd22b8 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -1780,6 +1780,14 @@ static inline int update_intpte(intpte_t *p, - _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \ - (_m), (_v), (_ad)) - -+/* -+ * PTE flags that a guest may change without re-validating the PTE. -+ * All other bits affect translation, caching, or Xen's safety. -+ */ -+#define FASTPATH_FLAG_WHITELIST \ -+ (_PAGE_NX_BIT | _PAGE_AVAIL_HIGH | _PAGE_AVAIL | _PAGE_GLOBAL | \ -+ _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER) -+ - /* Update the L1 entry at pl1e to new value nl1e. */ - static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, - unsigned long gl1mfn, int preserve_ad, -@@ -1820,9 +1828,8 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, - return -EINVAL; - } - -- /* Fast path for identical mapping, r/w, presence, and cachability. */ -- if ( !l1e_has_changed(ol1e, nl1e, -- PAGE_CACHE_ATTRS | _PAGE_RW | _PAGE_PRESENT) ) -+ /* Fast path for sufficiently-similar mappings. */ -+ if ( !l1e_has_changed(ol1e, nl1e, ~FASTPATH_FLAG_WHITELIST) ) - { - adjust_guest_l1e(nl1e, pt_dom); - if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, -@@ -1904,11 +1911,8 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, - return -EINVAL; - } - -- /* Fast path for identical mapping and presence. */ -- if ( !l2e_has_changed(ol2e, nl2e, -- unlikely(opt_allow_superpage) -- ? _PAGE_PSE | _PAGE_RW | _PAGE_PRESENT -- : _PAGE_PRESENT) ) -+ /* Fast path for sufficiently-similar mappings. */ -+ if ( !l2e_has_changed(ol2e, nl2e, ~FASTPATH_FLAG_WHITELIST) ) - { - adjust_guest_l2e(nl2e, d); - if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) ) -@@ -1973,8 +1977,8 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, - return -EINVAL; - } - -- /* Fast path for identical mapping and presence. */ -- if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) ) -+ /* Fast path for sufficiently-similar mappings. */ -+ if ( !l3e_has_changed(ol3e, nl3e, ~FASTPATH_FLAG_WHITELIST) ) - { - adjust_guest_l3e(nl3e, d); - rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, preserve_ad); -@@ -2037,8 +2041,8 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, - return -EINVAL; - } - -- /* Fast path for identical mapping and presence. */ -- if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) ) -+ /* Fast path for sufficiently-similar mappings. */ -+ if ( !l4e_has_changed(ol4e, nl4e, ~FASTPATH_FLAG_WHITELIST) ) - { - adjust_guest_l4e(nl4e, d); - rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, preserve_ad); -diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h -index 66b611c..1a59ed8 100644 ---- a/xen/include/asm-x86/page.h -+++ b/xen/include/asm-x86/page.h -@@ -311,6 +311,7 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t); - #define _PAGE_AVAIL2 _AC(0x800,U) - #define _PAGE_AVAIL _AC(0xE00,U) - #define _PAGE_PSE_PAT _AC(0x1000,U) -+#define _PAGE_AVAIL_HIGH (_AC(0x7ff, U) << 12) - #define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0) - /* non-architectural flags */ - #define _PAGE_PAGED 0x2000U --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa183-4.6.patch xen-4.6.5/debian/patches/xsa183-4.6.patch --- xen-4.6.0/debian/patches/xsa183-4.6.patch 2016-10-06 13:52:59.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa183-4.6.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,75 +0,0 @@ -From 777ebe30e81ab284f9b78392875fe884a593df35 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 15 Jun 2016 18:32:14 +0100 -Subject: [PATCH] x86/entry: Avoid SMAP violation in - compat_create_bounce_frame() - -A 32bit guest kernel might be running on user mappings. -compat_create_bounce_frame() must whitelist its guest accesses to avoid -risking a SMAP violation. - -For both variants of create_bounce_frame(), re-blacklist user accesses if -execution exits via an exception table redirection. - -This is XSA-183 / CVE-2016-6259 - -Signed-off-by: Andrew Cooper -Reviewed-by: George Dunlap -Reviewed-by: Jan Beulich ---- -v2: - * Include CLAC on the exit paths from compat_create_bounce_frame which occur - from faults attempting to load %fs - * Reposition ASM_STAC to avoid breaking the macro-op fusion of test/jz ---- - xen/arch/x86/x86_64/compat/entry.S | 3 +++ - xen/arch/x86/x86_64/entry.S | 2 ++ - 2 files changed, 5 insertions(+) - -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index 0e3db7c..1eaf4bb 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -350,6 +350,7 @@ ENTRY(compat_int80_direct_trap) - compat_create_bounce_frame: - ASSERT_INTERRUPTS_ENABLED - mov %fs,%edi -+ ASM_STAC - testb $2,UREGS_cs+8(%rsp) - jz 1f - /* Push new frame at registered guest-OS stack base. */ -@@ -403,6 +404,7 @@ UNLIKELY_START(nz, compat_bounce_failsafe) - movl %ds,%eax - .Lft12: movl %eax,%fs:0*4(%rsi) # DS - UNLIKELY_END(compat_bounce_failsafe) -+ ASM_CLAC - /* Rewrite our stack frame and return to guest-OS mode. */ - /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */ - andl $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\ -@@ -448,6 +450,7 @@ compat_crash_page_fault_4: - addl $4,%esi - compat_crash_page_fault: - .Lft14: mov %edi,%fs -+ ASM_CLAC - movl %esi,%edi - call show_page_walk - jmp dom_crash_sync_extable -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 6e27508..0c2e63a 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -462,9 +462,11 @@ domain_crash_page_fault_16: - domain_crash_page_fault_8: - addq $8,%rsi - domain_crash_page_fault: -+ ASM_CLAC - movq %rsi,%rdi - call show_page_walk - ENTRY(dom_crash_sync_extable) -+ ASM_CLAC - # Get out of the guest-save area of the stack. - GET_STACK_BASE(%rax) - leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa185.patch xen-4.6.5/debian/patches/xsa185.patch --- xen-4.6.0/debian/patches/xsa185.patch 2016-10-06 13:53:12.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa185.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ -From 30aba4992b18245c436f16df7326a16c01a51570 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Mon, 8 Aug 2016 10:58:12 +0100 -Subject: x86/32on64: don't allow recursive page tables from L3 - -L3 entries are special in PAE mode, and hence can't reasonably be used -for setting up recursive (and hence linear) page table mappings. Since -abuse is possible when the guest in fact gets run on 4-level page -tables, this needs to be excluded explicitly. - -This is XSA-185. - -Reported-by: Jérémie Boutoille -Reported-by: 栾尚聪(好风) -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper ---- - xen/arch/x86/mm.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 109b8be..69b8b8d 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -1122,7 +1122,9 @@ get_page_from_l3e( - - rc = get_page_and_type_from_pagenr( - l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, 1); -- if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) ) -+ if ( unlikely(rc == -EINVAL) && -+ !is_pv_32bit_domain(d) && -+ get_l3_linear_pagetable(l3e, pfn, d) ) - rc = 0; - - return rc; --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa187-4.6-0002-x86-segment-Bounds-check-accesses-to-emulation-ctx.patch xen-4.6.5/debian/patches/xsa187-4.6-0002-x86-segment-Bounds-check-accesses-to-emulation-ctx.patch --- xen-4.6.0/debian/patches/xsa187-4.6-0002-x86-segment-Bounds-check-accesses-to-emulation-ctx.patch 2016-10-06 16:18:20.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa187-4.6-0002-x86-segment-Bounds-check-accesses-to-emulation-ctx.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,148 +0,0 @@ -From: Andrew Cooper -Subject: x86/segment: Bounds check accesses to emulation ctxt->seg_reg[] - -HVM HAP codepaths have space for all segment registers in the seg_reg[] -cache (with x86_seg_none still risking an array overrun), while the shadow -codepaths only have space for the user segments. - -Range check the input segment of *_get_seg_reg() against the size of the array -used to cache the results, to avoid overruns in the case that the callers -don't filter their input suitably. - -Subsume the is_x86_user_segment(seg) checks from the shadow code, which were -an incomplete attempt at range checking, and are now superceeded. Make -hvm_get_seg_reg() static, as it is not used outside of shadow/common.c - -No functional change, but far easier to reason that no overflow is possible. - -Reported-by: Andrew Cooper -Signed-off-by: Andrew Cooper -Acked-by: Tim Deegan -Acked-by: Jan Beulich - -Index: xen-4.6.0/xen/arch/x86/hvm/emulate.c -=================================================================== ---- xen-4.6.0.orig/xen/arch/x86/hvm/emulate.c 2016-10-06 18:18:12.839984594 +0200 -+++ xen-4.6.0/xen/arch/x86/hvm/emulate.c 2016-10-06 18:18:12.835984594 +0200 -@@ -517,6 +517,8 @@ static int hvmemul_virtual_to_linear( - ? 1 : 4096); - - reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); -+ if ( IS_ERR(reg) ) -+ return -PTR_ERR(reg); - - if ( (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1) ) - { -@@ -1347,6 +1349,10 @@ static int hvmemul_read_segment( - struct hvm_emulate_ctxt *hvmemul_ctxt = - container_of(ctxt, struct hvm_emulate_ctxt, ctxt); - struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); -+ -+ if ( IS_ERR(sreg) ) -+ return -PTR_ERR(sreg); -+ - memcpy(reg, sreg, sizeof(struct segment_register)); - return X86EMUL_OKAY; - } -@@ -1360,6 +1366,9 @@ static int hvmemul_write_segment( - container_of(ctxt, struct hvm_emulate_ctxt, ctxt); - struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); - -+ if ( IS_ERR(sreg) ) -+ return -PTR_ERR(sreg); -+ - memcpy(sreg, reg, sizeof(struct segment_register)); - __set_bit(seg, &hvmemul_ctxt->seg_reg_dirty); - -@@ -1852,10 +1861,17 @@ void hvm_emulate_writeback( - } - } - -+/* -+ * Callers which pass a known in-range x86_segment can rely on the return -+ * pointer being valid. Other callers must explicitly check for errors. -+ */ - struct segment_register *hvmemul_get_seg_reg( - enum x86_segment seg, - struct hvm_emulate_ctxt *hvmemul_ctxt) - { -+ if ( seg < 0 || seg >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) ) -+ return ERR_PTR(-X86EMUL_UNHANDLEABLE); -+ - if ( !__test_and_set_bit(seg, &hvmemul_ctxt->seg_reg_accessed) ) - hvm_get_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]); - return &hvmemul_ctxt->seg_reg[seg]; -Index: xen-4.6.0/xen/arch/x86/mm/shadow/common.c -=================================================================== ---- xen-4.6.0.orig/xen/arch/x86/mm/shadow/common.c 2016-10-06 18:18:12.839984594 +0200 -+++ xen-4.6.0/xen/arch/x86/mm/shadow/common.c 2016-10-06 18:18:12.835984594 +0200 -@@ -125,10 +125,19 @@ __initcall(shadow_audit_key_init); - /* x86 emulator support for the shadow code - */ - -+/* -+ * Callers which pass a known in-range x86_segment can rely on the return -+ * pointer being valid. Other callers must explicitly check for errors. -+ */ - struct segment_register *hvm_get_seg_reg( - enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt) - { -- struct segment_register *seg_reg = &sh_ctxt->seg_reg[seg]; -+ struct segment_register *seg_reg; -+ -+ if ( seg < 0 || seg >= ARRAY_SIZE(sh_ctxt->seg_reg) ) -+ return ERR_PTR(-X86EMUL_UNHANDLEABLE); -+ -+ seg_reg = &sh_ctxt->seg_reg[seg]; - if ( !__test_and_set_bit(seg, &sh_ctxt->valid_seg_regs) ) - hvm_get_segment_register(current, seg, seg_reg); - return seg_reg; -@@ -145,14 +154,9 @@ static int hvm_translate_linear_addr( - struct segment_register *reg; - int okay; - -- /* -- * Can arrive here with non-user segments. However, no such cirucmstance -- * is part of a legitimate pagetable update, so fail the emulation. -- */ -- if ( !is_x86_user_segment(seg) ) -- return X86EMUL_UNHANDLEABLE; -- - reg = hvm_get_seg_reg(seg, sh_ctxt); -+ if ( IS_ERR(reg) ) -+ return -PTR_ERR(reg); - - okay = hvm_virtual_to_linear_addr( - seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr); -@@ -254,9 +258,6 @@ hvm_emulate_write(enum x86_segment seg, - unsigned long addr; - int rc; - -- if ( !is_x86_user_segment(seg) ) -- return X86EMUL_UNHANDLEABLE; -- - /* How many emulations could we save if we unshadowed on stack writes? */ - if ( seg == x86_seg_ss ) - perfc_incr(shadow_fault_emulate_stack); -@@ -284,9 +285,6 @@ hvm_emulate_cmpxchg(enum x86_segment seg - unsigned long addr, old[2], new[2]; - int rc; - -- if ( !is_x86_user_segment(seg) ) -- return X86EMUL_UNHANDLEABLE; -- - rc = hvm_translate_linear_addr( - seg, offset, bytes, hvm_access_write, sh_ctxt, &addr); - if ( rc ) -Index: xen-4.6.0/xen/include/asm-x86/hvm/emulate.h -=================================================================== ---- xen-4.6.0.orig/xen/include/asm-x86/hvm/emulate.h 2016-10-06 18:18:12.839984594 +0200 -+++ xen-4.6.0/xen/include/asm-x86/hvm/emulate.h 2016-10-06 18:18:12.835984594 +0200 -@@ -13,6 +13,7 @@ - #define __ASM_X86_HVM_EMULATE_H__ - - #include -+#include - #include - #include - diff -Nru xen-4.6.0/debian/patches/xsa187-4.7-0001-x86-shadow-Avoid-overflowing-sh_ctxt-seg.patch xen-4.6.5/debian/patches/xsa187-4.7-0001-x86-shadow-Avoid-overflowing-sh_ctxt-seg.patch --- xen-4.6.0/debian/patches/xsa187-4.7-0001-x86-shadow-Avoid-overflowing-sh_ctxt-seg.patch 2016-10-06 16:17:45.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa187-4.7-0001-x86-shadow-Avoid-overflowing-sh_ctxt-seg.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -From: Andrew Cooper -Subject: x86/shadow: Avoid overflowing sh_ctxt->seg_reg[] - -hvm_get_seg_reg() does not perform a range check on its input segment, calls -hvm_get_segment_register() and writes straight into sh_ctxt->seg_reg[]. - -x86_seg_none is outside the bounds of sh_ctxt->seg_reg[], and will hit a BUG() -in {vmx,svm}_get_segment_register(). - -HVM guests running with shadow paging can end up performing a virtual to -linear translation with x86_seg_none. This is used for addresses which are -already linear. However, none of this is a legitimate pagetable update, so -fail the emulation in such a case. - -This is XSA-187 - -Reported-by: Andrew Cooper -Signed-off-by: Andrew Cooper -Reviewed-by: Tim Deegan - ---- a/xen/arch/x86/mm/shadow/common.c -+++ b/xen/arch/x86/mm/shadow/common.c -@@ -140,9 +140,18 @@ static int hvm_translate_linear_addr( - struct sh_emulate_ctxt *sh_ctxt, - unsigned long *paddr) - { -- struct segment_register *reg = hvm_get_seg_reg(seg, sh_ctxt); -+ struct segment_register *reg; - int okay; - -+ /* -+ * Can arrive here with non-user segments. However, no such cirucmstance -+ * is part of a legitimate pagetable update, so fail the emulation. -+ */ -+ if ( !is_x86_user_segment(seg) ) -+ return X86EMUL_UNHANDLEABLE; -+ -+ reg = hvm_get_seg_reg(seg, sh_ctxt); -+ - okay = hvm_virtual_to_linear_addr( - seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr); - diff -Nru xen-4.6.0/debian/patches/xsa190-4.6.patch xen-4.6.5/debian/patches/xsa190-4.6.patch --- xen-4.6.0/debian/patches/xsa190-4.6.patch 2016-10-06 13:55:06.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa190-4.6.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,163 +0,0 @@ -x86emul: honor guest CR0.TS and CR0.EM - -We must not emulate any instructions accessing respective registers -when either of these flags is set in the guest view of the register, or -else we may do so on data not belonging to the guest's current task. - -Being architecturally required behavior, the logic gets placed in the -instruction emulator instead of hvmemul_get_fpu(). It should be noted, -though, that hvmemul_get_fpu() being the only current handler for the -get_fpu() callback, we don't have an active problem with CR4: Both -CR4.OSFXSR and CR4.OSXSAVE get handled as necessary by that function. - -This is XSA-190. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/tools/tests/x86_emulator/test_x86_emulator.c -+++ b/tools/tests/x86_emulator/test_x86_emulator.c -@@ -129,6 +129,22 @@ static inline uint64_t xgetbv(uint32_t x - (ebx & (1U << 5)) != 0; \ - }) - -+static int read_cr( -+ unsigned int reg, -+ unsigned long *val, -+ struct x86_emulate_ctxt *ctxt) -+{ -+ /* Fake just enough state for the emulator's _get_fpu() to be happy. */ -+ switch ( reg ) -+ { -+ case 0: -+ *val = 0x00000001; /* PE */ -+ return X86EMUL_OKAY; -+ } -+ -+ return X86EMUL_UNHANDLEABLE; -+} -+ - int get_fpu( - void (*exception_callback)(void *, struct cpu_user_regs *), - void *exception_callback_arg, -@@ -160,6 +176,7 @@ static struct x86_emulate_ops emulops = - .write = write, - .cmpxchg = cmpxchg, - .cpuid = cpuid, -+ .read_cr = read_cr, - .get_fpu = get_fpu, - }; - ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -1557,6 +1557,7 @@ static int hvmemul_get_fpu( - switch ( type ) - { - case X86EMUL_FPU_fpu: -+ case X86EMUL_FPU_wait: - break; - case X86EMUL_FPU_mmx: - if ( !cpu_has_mmx ) -@@ -1564,7 +1565,6 @@ static int hvmemul_get_fpu( - break; - case X86EMUL_FPU_xmm: - if ( !cpu_has_xmm || -- (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_EM) || - !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSFXSR) ) - return X86EMUL_UNHANDLEABLE; - break; ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -366,6 +366,9 @@ typedef union { - - /* Control register flags. */ - #define CR0_PE (1<<0) -+#define CR0_MP (1<<1) -+#define CR0_EM (1<<2) -+#define CR0_TS (1<<3) - #define CR4_TSD (1<<2) - - /* EFLAGS bit definitions. */ -@@ -393,6 +396,7 @@ typedef union { - #define EXC_OF 4 - #define EXC_BR 5 - #define EXC_UD 6 -+#define EXC_NM 7 - #define EXC_TS 10 - #define EXC_NP 11 - #define EXC_SS 12 -@@ -674,10 +678,45 @@ static void fpu_handle_exception(void *_ - regs->eip += fic->insn_bytes; - } - -+static int _get_fpu( -+ enum x86_emulate_fpu_type type, -+ struct fpu_insn_ctxt *fic, -+ struct x86_emulate_ctxt *ctxt, -+ const struct x86_emulate_ops *ops) -+{ -+ int rc; -+ -+ fic->exn_raised = 0; -+ -+ fail_if(!ops->get_fpu); -+ rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt); -+ -+ if ( rc == X86EMUL_OKAY ) -+ { -+ unsigned long cr0; -+ -+ fail_if(!ops->read_cr); -+ rc = ops->read_cr(0, &cr0, ctxt); -+ if ( rc != X86EMUL_OKAY ) -+ return rc; -+ if ( cr0 & CR0_EM ) -+ { -+ generate_exception_if(type == X86EMUL_FPU_fpu, EXC_NM, -1); -+ generate_exception_if(type == X86EMUL_FPU_mmx, EXC_UD, -1); -+ generate_exception_if(type == X86EMUL_FPU_xmm, EXC_UD, -1); -+ } -+ generate_exception_if((cr0 & CR0_TS) && -+ (type != X86EMUL_FPU_wait || (cr0 & CR0_MP)), -+ EXC_NM, -1); -+ } -+ -+ done: -+ return rc; -+} -+ - #define get_fpu(_type, _fic) \ --do{ (_fic)->exn_raised = 0; \ -- fail_if(ops->get_fpu == NULL); \ -- rc = ops->get_fpu(fpu_handle_exception, _fic, _type, ctxt); \ -+do { \ -+ rc = _get_fpu(_type, _fic, ctxt, ops); \ - if ( rc ) goto done; \ - } while (0) - #define _put_fpu() \ -@@ -2508,8 +2547,14 @@ x86_emulate( - } - - case 0x9b: /* wait/fwait */ -- emulate_fpu_insn("fwait"); -+ { -+ struct fpu_insn_ctxt fic = { .insn_bytes = 1 }; -+ -+ get_fpu(X86EMUL_FPU_wait, &fic); -+ asm volatile ( "fwait" ::: "memory" ); -+ put_fpu(&fic); - break; -+ } - - case 0x9c: /* pushf */ - src.val = _regs.eflags; ---- a/xen/arch/x86/x86_emulate/x86_emulate.h -+++ b/xen/arch/x86/x86_emulate/x86_emulate.h -@@ -115,6 +115,7 @@ struct __packed segment_register { - /* FPU sub-types which may be requested via ->get_fpu(). */ - enum x86_emulate_fpu_type { - X86EMUL_FPU_fpu, /* Standard FPU coprocessor instruction set */ -+ X86EMUL_FPU_wait, /* WAIT/FWAIT instruction */ - X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */ - X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */ - X86EMUL_FPU_ymm /* AVX/XOP instruction set (%ymm0-%ymm7/15) */ diff -Nru xen-4.6.0/debian/patches/xsa191-4.6.patch xen-4.6.5/debian/patches/xsa191-4.6.patch --- xen-4.6.0/debian/patches/xsa191-4.6.patch 2017-01-10 13:33:16.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa191-4.6.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,138 +0,0 @@ -From: Andrew Cooper -Subject: x86/hvm: Fix the handling of non-present segments - -In 32bit, the data segments may be NULL to indicate that the segment is -ineligible for use. In both 32bit and 64bit, the LDT selector may be NULL to -indicate that the entire LDT is ineligible for use. However, nothing in Xen -actually checks for this condition when performing other segmentation -checks. (Note however that limit and writeability checks are correctly -performed). - -Neither Intel nor AMD specify the exact behaviour of loading a NULL segment. -Experimentally, AMD zeroes all attributes but leaves the base and limit -unmodified. Intel zeroes the base, sets the limit to 0xfffffff and resets the -attributes to just .G and .D/B. - -The use of the segment information in the VMCB/VMCS is equivalent to a native -pipeline interacting with the segment cache. The present bit can therefore -have a subtly different meaning, and it is now cooked to uniformly indicate -whether the segment is usable or not. - -GDTR and IDTR don't have access rights like the other segments, but for -consistency, they are treated as being present so no special casing is needed -elsewhere in the segmentation logic. - -AMD hardware does not consider the present bit for %cs and %tr, and will -function as if they were present. They are therefore unconditionally set to -present when reading information from the VMCB, to maintain the new meaning of -usability. - -Intel hardware has a separate unusable bit in the VMCS segment attributes. -This bit is inverted and stored in the present field, so the hvm code can work -with architecturally-common state. - -This is XSA-191. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -3666,6 +3666,10 @@ int hvm_virtual_to_linear_addr( - * COMPATIBILITY MODE: Apply segment checks and add base. - */ - -+ /* Segment not valid for use (cooked meaning of .p)? */ -+ if ( !reg->attr.fields.p ) -+ return 0; -+ - switch ( access_type ) - { - case hvm_access_read: -@@ -3871,6 +3875,10 @@ static int hvm_load_segment_selector( - hvm_get_segment_register( - v, (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab); - -+ /* Segment not valid for use (cooked meaning of .p)? */ -+ if ( !desctab.attr.fields.p ) -+ goto fail; -+ - /* Check against descriptor table limit. */ - if ( ((sel & 0xfff8) + 7) > desctab.limit ) - goto fail; ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -620,6 +620,7 @@ static void svm_get_segment_register(str - { - case x86_seg_cs: - memcpy(reg, &vmcb->cs, sizeof(*reg)); -+ reg->attr.fields.p = 1; - reg->attr.fields.g = reg->limit > 0xFFFFF; - break; - case x86_seg_ds: -@@ -653,13 +654,16 @@ static void svm_get_segment_register(str - case x86_seg_tr: - svm_sync_vmcb(v); - memcpy(reg, &vmcb->tr, sizeof(*reg)); -+ reg->attr.fields.p = 1; - reg->attr.fields.type |= 0x2; - break; - case x86_seg_gdtr: - memcpy(reg, &vmcb->gdtr, sizeof(*reg)); -+ reg->attr.bytes = 0x80; - break; - case x86_seg_idtr: - memcpy(reg, &vmcb->idtr, sizeof(*reg)); -+ reg->attr.bytes = 0x80; - break; - case x86_seg_ldtr: - svm_sync_vmcb(v); ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -867,10 +867,12 @@ void vmx_get_segment_register(struct vcp - reg->sel = sel; - reg->limit = limit; - -- reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00); -- /* Unusable flag is folded into Present flag. */ -- if ( attr & (1u<<16) ) -- reg->attr.fields.p = 0; -+ /* -+ * Fold VT-x representation into Xen's representation. The Present bit is -+ * unconditionally set to the inverse of unusable. -+ */ -+ reg->attr.bytes = -+ (!(attr & (1u << 16)) << 7) | (attr & 0x7f) | ((attr >> 4) & 0xf00); - - /* Adjust for virtual 8086 mode */ - if ( v->arch.hvm_vmx.vmx_realmode && seg <= x86_seg_tr -@@ -950,11 +952,11 @@ static void vmx_set_segment_register(str - } - } - -- attr = ((attr & 0xf00) << 4) | (attr & 0xff); -- -- /* Not-present must mean unusable. */ -- if ( !reg->attr.fields.p ) -- attr |= (1u << 16); -+ /* -+ * Unfold Xen representation into VT-x representation. The unusable bit -+ * is unconditionally set to the inverse of present. -+ */ -+ attr = (!(attr & (1u << 7)) << 16) | ((attr & 0xf00) << 4) | (attr & 0xff); - - /* VMX has strict consistency requirement for flag G. */ - attr |= !!(limit >> 20) << 15; ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -1209,6 +1209,10 @@ protmode_load_seg( - &desctab, ctxt)) ) - return rc; - -+ /* Segment not valid for use (cooked meaning of .p)? */ -+ if ( !desctab.attr.fields.p ) -+ goto raise_exn; -+ - /* Check against descriptor table limit. */ - if ( ((sel & 0xfff8) + 7) > desctab.limit ) - goto raise_exn; diff -Nru xen-4.6.0/debian/patches/xsa192.patch xen-4.6.5/debian/patches/xsa192.patch --- xen-4.6.0/debian/patches/xsa192.patch 2017-01-10 13:35:23.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa192.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,64 +0,0 @@ -From: Jan Beulich -Subject: x86/HVM: don't load LDTR with VM86 mode attrs during task switch - -Just like TR, LDTR is purely a protected mode facility and hence needs -to be loaded accordingly. Also move its loading to where it -architecurally belongs. - -This is XSA-192. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Tested-by: Andrew Cooper - ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -2728,17 +2728,16 @@ static void hvm_unmap_entry(void *p) - } - - static int hvm_load_segment_selector( -- enum x86_segment seg, uint16_t sel) -+ enum x86_segment seg, uint16_t sel, unsigned int eflags) - { - struct segment_register desctab, cs, segr; - struct desc_struct *pdesc, desc; - u8 dpl, rpl, cpl; - bool_t writable; - int fault_type = TRAP_invalid_tss; -- struct cpu_user_regs *regs = guest_cpu_user_regs(); - struct vcpu *v = current; - -- if ( regs->eflags & X86_EFLAGS_VM ) -+ if ( eflags & X86_EFLAGS_VM ) - { - segr.sel = sel; - segr.base = (uint32_t)sel << 4; -@@ -2986,6 +2985,8 @@ void hvm_task_switch( - if ( rc != HVMCOPY_okay ) - goto out; - -+ if ( hvm_load_segment_selector(x86_seg_ldtr, tss.ldt, 0) ) -+ goto out; - - if ( hvm_set_cr3(tss.cr3, 1) ) - goto out; -@@ -3008,13 +3009,12 @@ void hvm_task_switch( - } - - exn_raised = 0; -- if ( hvm_load_segment_selector(x86_seg_ldtr, tss.ldt) || -- hvm_load_segment_selector(x86_seg_es, tss.es) || -- hvm_load_segment_selector(x86_seg_cs, tss.cs) || -- hvm_load_segment_selector(x86_seg_ss, tss.ss) || -- hvm_load_segment_selector(x86_seg_ds, tss.ds) || -- hvm_load_segment_selector(x86_seg_fs, tss.fs) || -- hvm_load_segment_selector(x86_seg_gs, tss.gs) ) -+ if ( hvm_load_segment_selector(x86_seg_es, tss.es, tss.eflags) || -+ hvm_load_segment_selector(x86_seg_cs, tss.cs, tss.eflags) || -+ hvm_load_segment_selector(x86_seg_ss, tss.ss, tss.eflags) || -+ hvm_load_segment_selector(x86_seg_ds, tss.ds, tss.eflags) || -+ hvm_load_segment_selector(x86_seg_fs, tss.fs, tss.eflags) || -+ hvm_load_segment_selector(x86_seg_gs, tss.gs, tss.eflags) ) - exn_raised = 1; - - rc = hvm_copy_to_guest_virt( diff -Nru xen-4.6.0/debian/patches/xsa193-4.7.patch xen-4.6.5/debian/patches/xsa193-4.7.patch --- xen-4.6.0/debian/patches/xsa193-4.7.patch 2017-01-10 13:35:56.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa193-4.7.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,68 +0,0 @@ -From: Jan Beulich -Subject: x86/PV: writes of %fs and %gs base MSRs require canonical addresses - -Commit c42494acb2 ("x86: fix FS/GS base handling when using the -fsgsbase feature") replaced the use of wrmsr_safe() on these paths -without recognizing that wr{f,g}sbase() use just wrmsrl() and that the -WR{F,G}SBASE instructions also raise #GP for non-canonical input. - -Similarly arch_set_info_guest() needs to prevent non-canonical -addresses from getting stored into state later to be loaded by context -switch code. For consistency also check stack pointers and LDT base. -DR0..3, otoh, already get properly checked in set_debugreg() (albeit -we discard the error there). - -The SHADOW_GS_BASE check isn't strictly necessary, but I think we -better avoid trying the WRMSR if we know it's going to fail. - -This is XSA-193. - -Reported-by: Andrew Cooper -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -890,7 +890,13 @@ int arch_set_info_guest( - { - if ( !compat ) - { -- if ( !is_canonical_address(c.nat->user_regs.eip) || -+ if ( !is_canonical_address(c.nat->user_regs.rip) || -+ !is_canonical_address(c.nat->user_regs.rsp) || -+ !is_canonical_address(c.nat->kernel_sp) || -+ (c.nat->ldt_ents && !is_canonical_address(c.nat->ldt_base)) || -+ !is_canonical_address(c.nat->fs_base) || -+ !is_canonical_address(c.nat->gs_base_kernel) || -+ !is_canonical_address(c.nat->gs_base_user) || - !is_canonical_address(c.nat->event_callback_eip) || - !is_canonical_address(c.nat->syscall_callback_eip) || - !is_canonical_address(c.nat->failsafe_callback_eip) ) ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -2723,19 +2723,22 @@ static int emulate_privileged_op(struct - switch ( regs->_ecx ) - { - case MSR_FS_BASE: -- if ( is_pv_32bit_domain(currd) ) -+ if ( is_pv_32bit_domain(currd) || -+ !is_canonical_address(msr_content) ) - goto fail; - wrfsbase(msr_content); - v->arch.pv_vcpu.fs_base = msr_content; - break; - case MSR_GS_BASE: -- if ( is_pv_32bit_domain(currd) ) -+ if ( is_pv_32bit_domain(currd) || -+ !is_canonical_address(msr_content) ) - goto fail; - wrgsbase(msr_content); - v->arch.pv_vcpu.gs_base_kernel = msr_content; - break; - case MSR_SHADOW_GS_BASE: -- if ( is_pv_32bit_domain(currd) ) -+ if ( is_pv_32bit_domain(currd) || -+ !is_canonical_address(msr_content) ) - goto fail; - if ( wrmsr_safe(MSR_SHADOW_GS_BASE, msr_content) ) - goto fail; diff -Nru xen-4.6.0/debian/patches/xsa195.patch xen-4.6.5/debian/patches/xsa195.patch --- xen-4.6.0/debian/patches/xsa195.patch 2017-01-10 13:36:22.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa195.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,45 +0,0 @@ -From: Jan Beulich -Subject: x86emul: fix huge bit offset handling - -We must never chop off the high 32 bits. - -This is XSA-195. - -Reported-by: George Dunlap -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -2549,6 +2549,12 @@ x86_emulate( - else - { - /* -+ * Instructions such as bt can reference an arbitrary offset from -+ * their memory operand, but the instruction doing the actual -+ * emulation needs the appropriate op_bytes read from memory. -+ * Adjust both the source register and memory operand to make an -+ * equivalent instruction. -+ * - * EA += BitOffset DIV op_bytes*8 - * BitOffset = BitOffset MOD op_bytes*8 - * DIV truncates towards negative infinity. -@@ -2560,14 +2566,15 @@ x86_emulate( - src.val = (int32_t)src.val; - if ( (long)src.val < 0 ) - { -- unsigned long byte_offset; -- byte_offset = op_bytes + (((-src.val-1) >> 3) & ~(op_bytes-1)); -+ unsigned long byte_offset = -+ op_bytes + (((-src.val - 1) >> 3) & ~(op_bytes - 1L)); -+ - ea.mem.off -= byte_offset; - src.val = (byte_offset << 3) + src.val; - } - else - { -- ea.mem.off += (src.val >> 3) & ~(op_bytes - 1); -+ ea.mem.off += (src.val >> 3) & ~(op_bytes - 1L); - src.val &= (op_bytes << 3) - 1; - } - } diff -Nru xen-4.6.0/debian/patches/xsa196-0001-x86-emul-Correct-the-IDT-entry-calculation-in-inject.patch xen-4.6.5/debian/patches/xsa196-0001-x86-emul-Correct-the-IDT-entry-calculation-in-inject.patch --- xen-4.6.0/debian/patches/xsa196-0001-x86-emul-Correct-the-IDT-entry-calculation-in-inject.patch 2017-01-10 13:36:36.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa196-0001-x86-emul-Correct-the-IDT-entry-calculation-in-inject.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -From: Andrew Cooper -Subject: x86/emul: Correct the IDT entry calculation in inject_swint() - -The logic, as introduced in c/s 36ebf14ebe "x86/emulate: support for emulating -software event injection" is buggy. The size of an IDT entry depends on long -mode being active, not the width of the code segment currently in use. - -In particular, this means that a compatibility code segment which hits -emulation for software event injection will end up using an incorrect offset -in the IDT for DPL/Presence checking. In practice, this only occurs on old -AMD hardware lacking NRip support; all newer AMD hardware, and all Intel -hardware bypass this path in the emulator. - -While here, fix a minor issue with reading the IDT entry. The return value -from ops->read() wasn't checked, but in reality the only failure case is if a -pagefault occurs. This is not a realistic problem as the kernel will almost -certainly crash with a double fault if this setup actually occured. - -This is part of XSA-196. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich ---- - xen/arch/x86/x86_emulate/x86_emulate.c | 15 +++++++++++---- - 1 file changed, 11 insertions(+), 4 deletions(-) - -diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c -index 7a707dc..f74aa8f 100644 ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -1630,10 +1630,16 @@ static int inject_swint(enum x86_swint_type type, - { - if ( !in_realmode(ctxt, ops) ) - { -- unsigned int idte_size = (ctxt->addr_size == 64) ? 16 : 8; -- unsigned int idte_offset = vector * idte_size; -+ unsigned int idte_size, idte_offset; - struct segment_register idtr; - uint32_t idte_ctl; -+ int lm = in_longmode(ctxt, ops); -+ -+ if ( lm < 0 ) -+ return X86EMUL_UNHANDLEABLE; -+ -+ idte_size = lm ? 16 : 8; -+ idte_offset = vector * idte_size; - - /* icebp sets the External Event bit despite being an instruction. */ - error_code = (vector << 3) | ECODE_IDT | -@@ -1661,8 +1667,9 @@ static int inject_swint(enum x86_swint_type type, - * Should strictly speaking read all 8/16 bytes of an entry, - * but we currently only care about the dpl and present bits. - */ -- ops->read(x86_seg_none, idtr.base + idte_offset + 4, -- &idte_ctl, sizeof(idte_ctl), ctxt); -+ if ( (rc = ops->read(x86_seg_none, idtr.base + idte_offset + 4, -+ &idte_ctl, sizeof(idte_ctl), ctxt)) ) -+ goto done; - - /* Is this entry present? */ - if ( !(idte_ctl & (1u << 15)) ) diff -Nru xen-4.6.0/debian/patches/xsa196-0002-x86-svm-Fix-injection-of-software-interrupts.patch xen-4.6.5/debian/patches/xsa196-0002-x86-svm-Fix-injection-of-software-interrupts.patch --- xen-4.6.0/debian/patches/xsa196-0002-x86-svm-Fix-injection-of-software-interrupts.patch 2017-01-10 13:36:44.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa196-0002-x86-svm-Fix-injection-of-software-interrupts.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,76 +0,0 @@ -From: Andrew Cooper -Subject: x86/svm: Fix injection of software interrupts - -The non-NextRip logic in c/s 36ebf14eb "x86/emulate: support for emulating -software event injection" was based on an older version of the AMD software -manual. The manual was later corrected, following findings from that series. - -I took the original wording of "not supported without NextRIP" to mean that -X86_EVENTTYPE_SW_INTERRUPT was not eligible for use. It turns out that this -is not the case, and the new wording is clearer on the matter. - -Despite testing the original patch series on non-NRip hardware, the -swint-emulation XTF test case focuses on the debug vectors; it never ended up -executing an `int $n` instruction for a vector which wasn't also an exception. - -During a vmentry, the use of X86_EVENTTYPE_HW_EXCEPTION comes with a vector -check to ensure that it is only used with exception vectors. Xen's use of -X86_EVENTTYPE_HW_EXCEPTION for `int $n` injection has always been buggy on AMD -hardware. - -Fix this by always using X86_EVENTTYPE_SW_INTERRUPT. - -Print and decode the eventinj information in svm_vmcb_dump(), as it has -several invalid combinations which cause vmentry failures. - -This is part of XSA-196. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich ---- - xen/arch/x86/hvm/svm/svm.c | 13 +++++-------- - xen/arch/x86/hvm/svm/svmdebug.c | 4 ++++ - 2 files changed, 9 insertions(+), 8 deletions(-) - -diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c -index 4391744..76efc3e 100644 ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -1231,17 +1231,14 @@ static void svm_inject_trap(const struct hvm_trap *trap) - { - case X86_EVENTTYPE_SW_INTERRUPT: /* int $n */ - /* -- * Injection type 4 (software interrupt) is only supported with -- * NextRIP support. Without NextRIP, the emulator will have performed -- * DPL and presence checks for us. -+ * Software interrupts (type 4) cannot be properly injected if the -+ * processor doesn't support NextRIP. Without NextRIP, the emulator -+ * will have performed DPL and presence checks for us, and will have -+ * moved eip forward if appropriate. - */ - if ( cpu_has_svm_nrips ) -- { - vmcb->nextrip = regs->eip + _trap.insn_len; -- event.fields.type = X86_EVENTTYPE_SW_INTERRUPT; -- } -- else -- event.fields.type = X86_EVENTTYPE_HW_EXCEPTION; -+ event.fields.type = X86_EVENTTYPE_SW_INTERRUPT; - break; - - case X86_EVENTTYPE_PRI_SW_EXCEPTION: /* icebp */ -diff --git a/xen/arch/x86/hvm/svm/svmdebug.c b/xen/arch/x86/hvm/svm/svmdebug.c -index ded5d19..f93dfed 100644 ---- a/xen/arch/x86/hvm/svm/svmdebug.c -+++ b/xen/arch/x86/hvm/svm/svmdebug.c -@@ -48,6 +48,10 @@ void svm_vmcb_dump(const char *from, struct vmcb_struct *vmcb) - vmcb->tlb_control, - (unsigned long long)vmcb->_vintr.bytes, - (unsigned long long)vmcb->interrupt_shadow); -+ printk("eventinj %016"PRIx64", valid? %d, ec? %d, type %u, vector %#x\n", -+ vmcb->eventinj.bytes, vmcb->eventinj.fields.v, -+ vmcb->eventinj.fields.ev, vmcb->eventinj.fields.type, -+ vmcb->eventinj.fields.vector); - printk("exitcode = %#Lx exitintinfo = %#Lx\n", - (unsigned long long)vmcb->exitcode, - (unsigned long long)vmcb->exitintinfo.bytes); diff -Nru xen-4.6.0/debian/patches/xsa198.patch xen-4.6.5/debian/patches/xsa198.patch --- xen-4.6.0/debian/patches/xsa198.patch 2017-01-10 13:37:33.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa198.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,62 +0,0 @@ -From 71a389ae940bc52bf897a6e5becd73fd8ede94c5 Mon Sep 17 00:00:00 2001 -From: Ian Jackson -Date: Thu, 3 Nov 2016 16:37:40 +0000 -Subject: [PATCH] pygrub: Properly quote results, when returning them to the - caller: - -* When the caller wants sexpr output, use `repr()' - This is what Xend expects. - - The returned S-expressions are now escaped and quoted by Python, - generally using '...'. Previously kernel and ramdisk were unquoted - and args was quoted with "..." but without proper escaping. This - change may break toolstacks which do not properly dequote the - returned S-expressions. - -* When the caller wants "simple" output, crash if the delimiter is - contained in the returned value. - - With --output-format=simple it does not seem like this could ever - happen, because the bootloader config parsers all take line-based - input from the various bootloader config files. - - With --output-format=simple0, this can happen if the bootloader - config file contains nul bytes. - -This is XSA-198. - -Signed-off-by: Ian Jackson -Tested-by: Ian Jackson -Reviewed-by: Andrew Cooper ---- - tools/pygrub/src/pygrub | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub -index 40f9584..dd0c8f7 100755 ---- a/tools/pygrub/src/pygrub -+++ b/tools/pygrub/src/pygrub -@@ -721,14 +721,17 @@ def sniff_netware(fs, cfg): - return cfg - - def format_sxp(kernel, ramdisk, args): -- s = "linux (kernel %s)" % kernel -+ s = "linux (kernel %s)" % repr(kernel) - if ramdisk: -- s += "(ramdisk %s)" % ramdisk -+ s += "(ramdisk %s)" % repr(ramdisk) - if args: -- s += "(args \"%s\")" % args -+ s += "(args %s)" % repr(args) - return s - - def format_simple(kernel, ramdisk, args, sep): -+ for check in (kernel, ramdisk, args): -+ if check is not None and sep in check: -+ raise RuntimeError, "simple format cannot represent delimiter-containing value" - s = ("kernel %s" % kernel) + sep - if ramdisk: - s += ("ramdisk %s" % ramdisk) + sep --- -2.1.4 - diff -Nru xen-4.6.0/debian/patches/xsa200-4.6.patch xen-4.6.5/debian/patches/xsa200-4.6.patch --- xen-4.6.0/debian/patches/xsa200-4.6.patch 2017-01-10 13:37:58.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa200-4.6.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -From: Jan Beulich -Subject: x86emul: CMPXCHG8B ignores operand size prefix - -Otherwise besides mis-handling the instruction, the comparison failure -case would result in uninitialized stack data being handed back to the -guest in rDX:rAX (32 bits leaked for 32-bit guests, 96 bits for 64-bit -ones). - -This is XSA-200. - -Signed-off-by: Jan Beulich - ---- a/tools/tests/x86_emulator/test_x86_emulator.c -+++ b/tools/tests/x86_emulator/test_x86_emulator.c -@@ -429,6 +429,24 @@ int main(int argc, char **argv) - goto fail; - printf("okay\n"); - -+ printf("%-40s", "Testing cmpxchg8b (%edi) [opsize]..."); -+ instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0xc7; instr[3] = 0x0f; -+ res[0] = 0x12345678; -+ res[1] = 0x87654321; -+ regs.eflags = 0x200; -+ regs.eip = (unsigned long)&instr[0]; -+ regs.edi = (unsigned long)res; -+ rc = x86_emulate(&ctxt, &emulops); -+ if ( (rc != X86EMUL_OKAY) || -+ (res[0] != 0x12345678) || -+ (res[1] != 0x87654321) || -+ (regs.eax != 0x12345678) || -+ (regs.edx != 0x87654321) || -+ ((regs.eflags&0x240) != 0x200) || -+ (regs.eip != (unsigned long)&instr[4]) ) -+ goto fail; -+ printf("okay\n"); -+ - printf("%-40s", "Testing movsxbd (%%eax),%%ecx..."); - instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08; - regs.eflags = 0x200; ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -4739,8 +4739,12 @@ x86_emulate( - generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1); - generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); - if ( op_bytes == 8 ) -+ { - vcpu_must_have_cx16(); -- op_bytes *= 2; -+ op_bytes = 16; -+ } -+ else -+ op_bytes = 8; - - /* Get actual old value. */ - if ( (rc = ops->read(ea.mem.seg, ea.mem.off, old, op_bytes, diff -Nru xen-4.6.0/debian/patches/xsa201-1.patch xen-4.6.5/debian/patches/xsa201-1.patch --- xen-4.6.0/debian/patches/xsa201-1.patch 2017-01-10 13:38:18.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa201-1.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,87 +0,0 @@ -From: Wei Chen -Subject: arm64: handle guest-generated EL1 asynchronous abort - -In current code, when the hypervisor receives an asynchronous abort -from a guest, the hypervisor will do panic, the host will be down. -We have to prevent such security issue, so, in this patch we crash -the guest, when the hypervisor receives an asynchronous abort from -the guest. - -This is CVE-2016-9815, part of XSA-201. - -Signed-off-by: Wei Chen -Reviewed-by: Stefano Stabellini -Reviewed-by: Steve Capper -Reviewed-by: Julien Grall - ---- a/xen/arch/arm/arm64/entry.S -+++ b/xen/arch/arm/arm64/entry.S -@@ -204,9 +204,12 @@ guest_fiq_invalid: - entry hyp=0, compat=0 - invalid BAD_FIQ - --guest_error_invalid: -+guest_error: - entry hyp=0, compat=0 -- invalid BAD_ERROR -+ msr daifclr, #2 -+ mov x0, sp -+ bl do_trap_guest_error -+ exit hyp=0, compat=0 - - guest_sync_compat: - entry hyp=0, compat=1 -@@ -225,9 +228,12 @@ guest_fiq_invalid_compat: - entry hyp=0, compat=1 - invalid BAD_FIQ - --guest_error_invalid_compat: -+guest_error_compat: - entry hyp=0, compat=1 -- invalid BAD_ERROR -+ msr daifclr, #2 -+ mov x0, sp -+ bl do_trap_guest_error -+ exit hyp=0, compat=1 - - ENTRY(return_to_new_vcpu32) - exit hyp=0, compat=1 -@@ -286,12 +292,12 @@ ENTRY(hyp_traps_vector) - ventry guest_sync // Synchronous 64-bit EL0/EL1 - ventry guest_irq // IRQ 64-bit EL0/EL1 - ventry guest_fiq_invalid // FIQ 64-bit EL0/EL1 -- ventry guest_error_invalid // Error 64-bit EL0/EL1 -+ ventry guest_error // Error 64-bit EL0/EL1 - - ventry guest_sync_compat // Synchronous 32-bit EL0/EL1 - ventry guest_irq_compat // IRQ 32-bit EL0/EL1 - ventry guest_fiq_invalid_compat // FIQ 32-bit EL0/EL1 -- ventry guest_error_invalid_compat // Error 32-bit EL0/EL1 -+ ventry guest_error_compat // Error 32-bit EL0/EL1 - - /* - * struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next) ---- a/xen/arch/arm/traps.c -+++ b/xen/arch/arm/traps.c -@@ -2723,6 +2723,21 @@ asmlinkage void do_trap_hypervisor(struct cpu_user_regs *regs) - } - } - -+asmlinkage void do_trap_guest_error(struct cpu_user_regs *regs) -+{ -+ enter_hypervisor_head(regs); -+ -+ /* -+ * Currently, to ensure hypervisor safety, when we received a -+ * guest-generated vSerror/vAbort, we just crash the guest to protect -+ * the hypervisor. In future we can better handle this by injecting -+ * a vSerror/vAbort to the guest. -+ */ -+ gdprintk(XENLOG_WARNING, "Guest(Dom-%u) will be crashed by vSError\n", -+ current->domain->domain_id); -+ domain_crash_synchronous(); -+} -+ - asmlinkage void do_trap_irq(struct cpu_user_regs *regs) - { - enter_hypervisor_head(regs); diff -Nru xen-4.6.0/debian/patches/xsa201-2.patch xen-4.6.5/debian/patches/xsa201-2.patch --- xen-4.6.0/debian/patches/xsa201-2.patch 2017-01-10 13:38:25.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa201-2.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,199 +0,0 @@ -From: Wei Chen -Subject: arm64: handle async aborts delivered while at EL2 - -If EL1 generates an asynchronous abort and then traps into EL2 -(by HVC or IRQ) before the abort has been delivered, the hypervisor -could not catch it, because the PSTATE.A bit is masked all the time -in hypervisor. So this asynchronous abort may be slipped to next -running guest with PSTATE.A bit unmasked. - -In order to avoid this, it is necessary to take the abort at EL2, by -clearing the PSTATE.A bit. In this patch, we unmask the PSTATE.A bit -to open a window to catch guest-generated asynchronous abort in all -EL1 -> EL2 swich paths. If we catched such asynchronous abort in -checking window, the hyp_error exception will be triggered and the -abort source guest will be crashed. - -This is CVE-2016-9816, part of XSA-201. - -Signed-off-by: Wei Chen -Reviewed-by: Julien Grall - ---- a/xen/arch/arm/arm64/entry.S -+++ b/xen/arch/arm/arm64/entry.S -@@ -173,6 +173,43 @@ hyp_error_invalid: - entry hyp=1 - invalid BAD_ERROR - -+hyp_error: -+ /* -+ * Only two possibilities: -+ * 1) Either we come from the exit path, having just unmasked -+ * PSTATE.A: change the return code to an EL2 fault, and -+ * carry on, as we're already in a sane state to handle it. -+ * 2) Or we come from anywhere else, and that's a bug: we panic. -+ */ -+ entry hyp=1 -+ msr daifclr, #2 -+ -+ /* -+ * The ELR_EL2 may be modified by an interrupt, so we have to use the -+ * saved value in cpu_user_regs to check whether we come from 1) or -+ * not. -+ */ -+ ldr x0, [sp, #UREGS_PC] -+ adr x1, abort_guest_exit_start -+ cmp x0, x1 -+ adr x1, abort_guest_exit_end -+ ccmp x0, x1, #4, ne -+ mov x0, sp -+ mov x1, #BAD_ERROR -+ -+ /* -+ * Not equal, the exception come from 2). It's a bug, we have to -+ * panic the hypervisor. -+ */ -+ b.ne do_bad_mode -+ -+ /* -+ * Otherwise, the exception come from 1). It happened because of -+ * the guest. Crash this guest. -+ */ -+ bl do_trap_guest_error -+ exit hyp=1 -+ - /* Traps taken in Current EL with SP_ELx */ - hyp_sync: - entry hyp=1 -@@ -189,15 +226,29 @@ hyp_irq: - - guest_sync: - entry hyp=0, compat=0 -+ bl check_pending_vserror -+ /* -+ * If x0 is Non-zero, a vSError took place, the initial exception -+ * doesn't have any significance to be handled. Exit ASAP -+ */ -+ cbnz x0, 1f - msr daifclr, #2 - mov x0, sp - bl do_trap_hypervisor -+1: - exit hyp=0, compat=0 - - guest_irq: - entry hyp=0, compat=0 -+ bl check_pending_vserror -+ /* -+ * If x0 is Non-zero, a vSError took place, the initial exception -+ * doesn't have any significance to be handled. Exit ASAP -+ */ -+ cbnz x0, 1f - mov x0, sp - bl do_trap_irq -+1: - exit hyp=0, compat=0 - - guest_fiq_invalid: -@@ -213,15 +264,29 @@ guest_error: - - guest_sync_compat: - entry hyp=0, compat=1 -+ bl check_pending_vserror -+ /* -+ * If x0 is Non-zero, a vSError took place, the initial exception -+ * doesn't have any significance to be handled. Exit ASAP -+ */ -+ cbnz x0, 1f - msr daifclr, #2 - mov x0, sp - bl do_trap_hypervisor -+1: - exit hyp=0, compat=1 - - guest_irq_compat: - entry hyp=0, compat=1 -+ bl check_pending_vserror -+ /* -+ * If x0 is Non-zero, a vSError took place, the initial exception -+ * doesn't have any significance to be handled. Exit ASAP -+ */ -+ cbnz x0, 1f - mov x0, sp - bl do_trap_irq -+1: - exit hyp=0, compat=1 - - guest_fiq_invalid_compat: -@@ -270,6 +335,62 @@ return_from_trap: - eret - - /* -+ * This function is used to check pending virtual SError in the gap of -+ * EL1 -> EL2 world switch. -+ * The x0 register will be used to indicate the results of detection. -+ * x0 -- Non-zero indicates a pending virtual SError took place. -+ * x0 -- Zero indicates no pending virtual SError took place. -+ */ -+check_pending_vserror: -+ /* -+ * Save elr_el2 to check whether the pending SError exception takes -+ * place while we are doing this sync exception. -+ */ -+ mrs x0, elr_el2 -+ -+ /* Synchronize against in-flight ld/st */ -+ dsb sy -+ -+ /* -+ * Unmask PSTATE asynchronous abort bit. If there is a pending -+ * SError, the EL2 error exception will happen after PSTATE.A -+ * is cleared. -+ */ -+ msr daifclr, #4 -+ -+ /* -+ * This is our single instruction exception window. A pending -+ * SError is guaranteed to occur at the earliest when we unmask -+ * it, and at the latest just after the ISB. -+ * -+ * If a pending SError occurs, the program will jump to EL2 error -+ * exception handler, and the elr_el2 will be set to -+ * abort_guest_exit_start or abort_guest_exit_end. -+ */ -+abort_guest_exit_start: -+ -+ isb -+ -+abort_guest_exit_end: -+ /* Mask PSTATE asynchronous abort bit, close the checking window. */ -+ msr daifset, #4 -+ -+ /* -+ * Compare elr_el2 and the saved value to check whether we are -+ * returning from a valid exception caused by pending SError. -+ */ -+ mrs x1, elr_el2 -+ cmp x0, x1 -+ -+ /* -+ * Not equal, the pending SError exception took place, set -+ * x0 to non-zero. -+ */ -+ cset x0, ne -+ -+ ret -+ -+/* - * Exception vectors. - */ - .macro ventry label -@@ -287,7 +408,7 @@ ENTRY(hyp_traps_vector) - ventry hyp_sync // Synchronous EL2h - ventry hyp_irq // IRQ EL2h - ventry hyp_fiq_invalid // FIQ EL2h -- ventry hyp_error_invalid // Error EL2h -+ ventry hyp_error // Error EL2h - - ventry guest_sync // Synchronous 64-bit EL0/EL1 - ventry guest_irq // IRQ 64-bit EL0/EL1 diff -Nru xen-4.6.0/debian/patches/xsa201-3-4.7.patch xen-4.6.5/debian/patches/xsa201-3-4.7.patch --- xen-4.6.0/debian/patches/xsa201-3-4.7.patch 2017-01-10 13:38:32.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa201-3-4.7.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,47 +0,0 @@ -From: Wei Chen -Subject: arm: crash the guest when it traps on external abort - -If we spot a data or prefetch abort bearing the ESR_EL2.EA bit set, we -know that this is an external abort, and that should crash the guest. - -This is CVE-2016-9817, part of XSA-201. - -Signed-off-by: Wei Chen -Reviewed-by: Stefano Stabellini -Reviewed-by: Steve Capper -Reviewed-by: Julien Grall - ---- a/xen/arch/arm/traps.c -+++ b/xen/arch/arm/traps.c -@@ -2383,6 +2383,15 @@ static void do_trap_instr_abort_guest(struct cpu_user_regs *regs, - int rc; - register_t gva = READ_SYSREG(FAR_EL2); - -+ /* -+ * If this bit has been set, it means that this instruction abort is caused -+ * by a guest external abort. Currently we crash the guest to protect the -+ * hypervisor. In future one can better handle this by injecting a virtual -+ * abort to the guest. -+ */ -+ if ( hsr.iabt.eat ) -+ domain_crash_synchronous(); -+ - switch ( hsr.iabt.ifsc & 0x3f ) - { - case FSC_FLT_PERM ... FSC_FLT_PERM + 3: -@@ -2437,6 +2446,15 @@ static void do_trap_data_abort_guest(struct cpu_user_regs *regs, - return; - } - -+ /* -+ * If this bit has been set, it means that this data abort is caused -+ * by a guest external abort. Currently we crash the guest to protect the -+ * hypervisor. In future one can better handle this by injecting a virtual -+ * abort to the guest. -+ */ -+ if ( dabt.eat ) -+ domain_crash_synchronous(); -+ - info.dabt = dabt; - #ifdef CONFIG_ARM_32 - info.gva = READ_CP32(HDFAR); diff -Nru xen-4.6.0/debian/patches/xsa201-4.patch xen-4.6.5/debian/patches/xsa201-4.patch --- xen-4.6.0/debian/patches/xsa201-4.patch 2017-01-10 13:38:39.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa201-4.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,130 +0,0 @@ -From: Wei Chen -Subject: arm32: handle async aborts delivered while at HYP - -If guest generates an asynchronous abort and then traps into HYP -(by HVC or IRQ) before the abort has been delivered, the hypervisor -could not catch it, because the PSTATE.A bit is masked all the time -in hypervisor. So this asynchronous abort may be slipped to next -running guest with PSTATE.A bit unmasked. - -In order to avoid this, it is necessary to take the abort at HYP, by -clearing the PSTATE.A bit. In this patch, we unmask the PSTATE.A bit -to open a window to catch guest-generated asynchronous abort in all -Guest -> HYP switch paths. If we caught such asynchronous abort in -checking window, the HYP data abort exception will be triggered and -the abort source guest will be crashed. - -This is CVE-2016-9818, part of XSA-201. - -Signed-off-by: Wei Chen -Reviewed-by: Julien Grall - ---- a/xen/arch/arm/arm32/entry.S -+++ b/xen/arch/arm/arm32/entry.S -@@ -42,6 +42,61 @@ save_guest_regs: - SAVE_BANKED(fiq) - SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); SAVE_ONE_BANKED(R10_fiq) - SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq); -+ /* -+ * Start to check pending virtual abort in the gap of Guest -> HYP -+ * world switch. -+ * -+ * Save ELR_hyp to check whether the pending virtual abort exception -+ * takes place while we are doing this trap exception. -+ */ -+ mrs r1, ELR_hyp -+ -+ /* -+ * Force loads and stores to complete before unmasking asynchronous -+ * aborts and forcing the delivery of the exception. -+ */ -+ dsb sy -+ -+ /* -+ * Unmask asynchronous abort bit. If there is a pending asynchronous -+ * abort, the data_abort exception will happen after A bit is cleared. -+ */ -+ cpsie a -+ -+ /* -+ * This is our single instruction exception window. A pending -+ * asynchronous abort is guaranteed to occur at the earliest when we -+ * unmask it, and at the latest just after the ISB. -+ * -+ * If a pending abort occurs, the program will jump to data_abort -+ * exception handler, and the ELR_hyp will be set to -+ * abort_guest_exit_start or abort_guest_exit_end. -+ */ -+ .global abort_guest_exit_start -+abort_guest_exit_start: -+ -+ isb -+ -+ .global abort_guest_exit_end -+abort_guest_exit_end: -+ /* Mask CPSR asynchronous abort bit, close the checking window. */ -+ cpsid a -+ -+ /* -+ * Compare ELR_hyp and the saved value to check whether we are -+ * returning from a valid exception caused by pending virtual -+ * abort. -+ */ -+ mrs r2, ELR_hyp -+ cmp r1, r2 -+ -+ /* -+ * Not equal, the pending virtual abort exception took place, the -+ * initial exception does not have any significance to be handled. -+ * Exit ASAP. -+ */ -+ bne return_from_trap -+ - mov pc, lr - - #define DEFINE_TRAP_ENTRY(trap) \ ---- a/xen/arch/arm/arm32/traps.c -+++ b/xen/arch/arm/arm32/traps.c -@@ -63,7 +63,10 @@ asmlinkage void do_trap_prefetch_abort(struct cpu_user_regs *regs) - - asmlinkage void do_trap_data_abort(struct cpu_user_regs *regs) - { -- do_unexpected_trap("Data Abort", regs); -+ if ( VABORT_GEN_BY_GUEST(regs) ) -+ do_trap_guest_error(regs); -+ else -+ do_unexpected_trap("Data Abort", regs); - } - - /* ---- a/xen/include/asm-arm/arm32/processor.h -+++ b/xen/include/asm-arm/arm32/processor.h -@@ -55,6 +55,17 @@ struct cpu_user_regs - - uint32_t pad1; /* Doubleword-align the user half of the frame */ - }; -+ -+/* Functions for pending virtual abort checking window. */ -+void abort_guest_exit_start(void); -+void abort_guest_exit_end(void); -+ -+#define VABORT_GEN_BY_GUEST(r) \ -+( \ -+ ( (unsigned long)abort_guest_exit_start == (r)->pc ) || \ -+ ( (unsigned long)abort_guest_exit_end == (r)->pc ) \ -+) -+ - #endif - - /* Layout as used in assembly, with src/dest registers mixed in */ ---- a/xen/include/asm-arm/processor.h -+++ b/xen/include/asm-arm/processor.h -@@ -690,6 +690,8 @@ void vcpu_regs_user_to_hyp(struct vcpu *vcpu, - int call_smc(register_t function_id, register_t arg0, register_t arg1, - register_t arg2); - -+void do_trap_guest_error(struct cpu_user_regs *regs); -+ - #endif /* __ASSEMBLY__ */ - #endif /* __ASM_ARM_PROCESSOR_H */ - /* diff -Nru xen-4.6.0/debian/patches/xsa202-4.6.patch xen-4.6.5/debian/patches/xsa202-4.6.patch --- xen-4.6.0/debian/patches/xsa202-4.6.patch 2017-01-10 13:51:54.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa202-4.6.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,70 +0,0 @@ -From: Jan Beulich -Subject: x86: force EFLAGS.IF on when exiting to PV guests - -Guest kernels modifying instructions in the process of being emulated -for another of their vCPU-s may effect EFLAGS.IF to be cleared upon -next exiting to guest context, by converting the being emulated -instruction to CLI (at the right point in time). Prevent any such bad -effects by always forcing EFLAGS.IF on. And to cover hypothetical other -similar issues, also force EFLAGS.{IOPL,NT,VM} to zero. - -This is XSA-202. - -Signed-off-by: Jan Beulich - -Index: xen-4.6.0/xen/arch/x86/x86_64/entry.S -=================================================================== ---- xen-4.6.0.orig/xen/arch/x86/x86_64/entry.S -+++ xen-4.6.0/xen/arch/x86/x86_64/entry.S -@@ -40,28 +40,29 @@ restore_all_guest: - testw $TRAP_syscall,4(%rsp) - jz iret_exit_to_guest - -+ movq 24(%rsp),%r11 # RFLAGS -+ andq $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),%r11 -+ orq $X86_EFLAGS_IF,%r11 -+ - /* Don't use SYSRET path if the return address is not canonical. */ - movq 8(%rsp),%rcx - sarq $47,%rcx - incl %ecx - cmpl $1,%ecx -- ja .Lforce_iret -+ movq 8(%rsp),%rcx # RIP -+ ja iret_exit_to_guest - - cmpw $FLAT_USER_CS32,16(%rsp)# CS -- movq 8(%rsp),%rcx # RIP -- movq 24(%rsp),%r11 # RFLAGS - movq 32(%rsp),%rsp # RSP - je 1f - sysretq - 1: sysretl - --.Lforce_iret: -- /* Mimic SYSRET behavior. */ -- movq 8(%rsp),%rcx # RIP -- movq 24(%rsp),%r11 # RFLAGS - ALIGN - /* No special register assumptions. */ - iret_exit_to_guest: -+ andl $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),24(%rsp) -+ orl $X86_EFLAGS_IF,24(%rsp) - addq $8,%rsp - .Lft0: iretq - -Index: xen-4.6.0/xen/arch/x86/x86_64/compat/entry.S -=================================================================== ---- xen-4.6.0.orig/xen/arch/x86/x86_64/compat/entry.S -+++ xen-4.6.0/xen/arch/x86/x86_64/compat/entry.S -@@ -174,6 +174,10 @@ compat_bad_hypercall: - /* %rbx: struct vcpu, interrupts disabled */ - ENTRY(compat_restore_all_guest) - ASSERT_INTERRUPTS_DISABLED -+ mov $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),%r11d -+ and UREGS_eflags(%rsp),%r11d -+ or $X86_EFLAGS_IF,%r11 -+ mov %r11d,UREGS_eflags(%rsp) - RESTORE_ALL adj=8 compat=1 - .Lft0: iretq - diff -Nru xen-4.6.0/debian/patches/xsa203-4.7.patch xen-4.6.5/debian/patches/xsa203-4.7.patch --- xen-4.6.0/debian/patches/xsa203-4.7.patch 2017-01-10 13:53:41.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa203-4.7.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,19 +0,0 @@ -From: Jan Beulich -Subject: x86/HVM: add missing NULL check before using VMFUNC hook - -This is XSA-203. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -1643,6 +1643,8 @@ static int hvmemul_vmfunc( - { - int rc; - -+ if ( !hvm_funcs.altp2m_vcpu_emulate_vmfunc ) -+ return X86EMUL_UNHANDLEABLE; - rc = hvm_funcs.altp2m_vcpu_emulate_vmfunc(ctxt->regs); - if ( rc != X86EMUL_OKAY ) - hvmemul_inject_hw_exception(TRAP_invalid_op, 0, ctxt); diff -Nru xen-4.6.0/debian/patches/xsa204-4.7.patch xen-4.6.5/debian/patches/xsa204-4.7.patch --- xen-4.6.0/debian/patches/xsa204-4.7.patch 2017-01-10 13:54:04.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa204-4.7.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,69 +0,0 @@ -From: Andrew Cooper -Date: Sun, 18 Dec 2016 15:42:59 +0000 -Subject: [PATCH] x86/emul: Correct the handling of eflags with SYSCALL - -A singlestep #DB is determined by the resulting eflags value from the -execution of SYSCALL, not the original eflags value. - -By using the original eflags value, we negate the guest kernels attempt to -protect itself from a privilege escalation by masking TF. - -Introduce a tf boolean and have the SYSCALL emulation recalculate it -after the instruction is complete. - -This is XSA-204 - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich ---- - xen/arch/x86/x86_emulate/x86_emulate.c | 23 ++++++++++++++++++++--- - 1 file changed, 20 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c -index bca7045..abe442e 100644 ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -1582,6 +1582,7 @@ x86_emulate( - union vex vex = {}; - unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes; - bool_t lock_prefix = 0; -+ bool_t tf = !!(ctxt->regs->eflags & EFLG_TF); - int override_seg = -1, rc = X86EMUL_OKAY; - struct operand src = { .reg = REG_POISON }; - struct operand dst = { .reg = REG_POISON }; -@@ -3910,9 +3911,8 @@ x86_emulate( - } - - no_writeback: -- /* Inject #DB if single-step tracing was enabled at instruction start. */ -- if ( (ctxt->regs->eflags & EFLG_TF) && (rc == X86EMUL_OKAY) && -- (ops->inject_hw_exception != NULL) ) -+ /* Should a singlestep #DB be raised? */ -+ if ( tf && (rc == X86EMUL_OKAY) && (ops->inject_hw_exception != NULL) ) - rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION; - - /* Commit shadow register state. */ -@@ -4143,6 +4143,23 @@ x86_emulate( - (rc = ops->write_segment(x86_seg_ss, &ss, ctxt)) ) - goto done; - -+ /* -+ * SYSCALL (unlike most instructions) evaluates its singlestep action -+ * based on the resulting EFLG_TF, not the starting EFLG_TF. -+ * -+ * As the #DB is raised after the CPL change and before the OS can -+ * switch stack, it is a large risk for privilege escalation. -+ * -+ * 64bit kernels should mask EFLG_TF in MSR_FMASK to avoid any -+ * vulnerability. Running the #DB handler on an IST stack is also a -+ * mitigation. -+ * -+ * 32bit kernels have no ability to mask EFLG_TF at all. Their only -+ * mitigation is to use a task gate for handling #DB (or to not use -+ * enable EFER.SCE to start with). -+ */ -+ tf = !!(_regs.eflags & EFLG_TF); -+ - break; - } - diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0001-xenstored-apply-a-write-transaction-rate-limit.patch xen-4.6.5/debian/patches/xsa206-4.6-0001-xenstored-apply-a-write-transaction-rate-limit.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0001-xenstored-apply-a-write-transaction-rate-limit.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0001-xenstored-apply-a-write-transaction-rate-limit.patch 2017-05-09 12:52:32.000000000 +0000 @@ -0,0 +1,448 @@ +From 5aed8d6e20d1848f6818e649905df84e9cee34ae Mon Sep 17 00:00:00 2001 +From: Ian Jackson +Date: Sat, 18 Mar 2017 16:44:46 +0000 +Subject: [PATCH 01/23] xenstored: apply a write transaction rate limit + +This avoids a rogue client being about to stall another client (eg the +toolstack) indefinitely. + +This is XSA-206. + +Reported-by: Juergen Gross +Signed-off-by: Ian Jackson +--- + tools/xenstore/Makefile | 3 +- + tools/xenstore/xenstored_core.c | 9 ++ + tools/xenstore/xenstored_core.h | 6 + + tools/xenstore/xenstored_domain.c | 215 +++++++++++++++++++++++++++++++++ + tools/xenstore/xenstored_domain.h | 25 ++++ + tools/xenstore/xenstored_transaction.c | 5 + + 6 files changed, 262 insertions(+), 1 deletion(-) + +Index: xen-4.6.5/tools/xenstore/Makefile +=================================================================== +--- xen-4.6.5.orig/tools/xenstore/Makefile ++++ xen-4.6.5/tools/xenstore/Makefile +@@ -32,6 +32,7 @@ XENSTORED_OBJS_$(CONFIG_FreeBSD) = xenst + XENSTORED_OBJS_$(CONFIG_MiniOS) = xenstored_minios.o + + XENSTORED_OBJS += $(XENSTORED_OBJS_y) ++LDLIBS_xenstored += -lrt + + ifneq ($(XENSTORE_STATIC_CLIENTS),y) + LIBXENSTORE := libxenstore.so +@@ -80,7 +81,7 @@ init-xenstore-domain: init-xenstore-doma + $(CC) $^ $(LDFLAGS) $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(call LDFLAGS_RPATH,../lib) -o $@ $(APPEND_LDFLAGS) + + xenstored: $(XENSTORED_OBJS) +- $(CC) $^ $(LDFLAGS) $(LDLIBS_libxenctrl) $(SOCKET_LIBS) $(call LDFLAGS_RPATH,../lib) -o $@ $(APPEND_LDFLAGS) ++ $(CC) $^ $(LDFLAGS) $(LDLIBS_libxenctrl) $(LDLIBS_xenstored) $(SOCKET_LIBS) $(call LDFLAGS_RPATH,../lib) -o $@ $(APPEND_LDFLAGS) + + xenstored.a: $(XENSTORED_OBJS) + $(AR) cr $@ $^ +Index: xen-4.6.5/tools/xenstore/xenstored_core.c +=================================================================== +--- xen-4.6.5.orig/tools/xenstore/xenstored_core.c ++++ xen-4.6.5/tools/xenstore/xenstored_core.c +@@ -356,6 +356,7 @@ static void initialize_fds(int sock, int + int *ptimeout) + { + struct connection *conn; ++ struct wrl_timestampt now; + + if (fds) + memset(fds, 0, sizeof(struct pollfd) * current_array_size); +@@ -375,8 +376,11 @@ static void initialize_fds(int sock, int + xce_pollfd_idx = set_fd(xc_evtchn_fd(xce_handle), + POLLIN|POLLPRI); + ++ wrl_gettime_now(&now); ++ + list_for_each_entry(conn, &connections, list) { + if (conn->domain) { ++ wrl_check_timeout(conn->domain, now, ptimeout); + if (domain_can_read(conn) || + (domain_can_write(conn) && + !list_empty(&conn->out_list))) +@@ -809,6 +813,7 @@ static void delete_node_single(struct co + corrupt(conn, "Could not delete '%s'", node->name); + return; + } ++ + domain_entry_dec(conn, node); + } + +@@ -948,6 +953,7 @@ static void do_write(struct connection * + } + + add_change_node(conn->transaction, name, false); ++ wrl_apply_debit_direct(conn); + fire_watches(conn, name, false); + send_ack(conn, XS_WRITE); + } +@@ -972,6 +978,7 @@ static void do_mkdir(struct connection * + return; + } + add_change_node(conn->transaction, name, false); ++ wrl_apply_debit_direct(conn); + fire_watches(conn, name, false); + } + send_ack(conn, XS_MKDIR); +@@ -1097,6 +1104,7 @@ static void do_rm(struct connection *con + + if (_rm(conn, node, name)) { + add_change_node(conn->transaction, name, true); ++ wrl_apply_debit_direct(conn); + fire_watches(conn, name, true); + send_ack(conn, XS_RM); + } +@@ -1172,6 +1180,7 @@ static void do_set_perms(struct connecti + } + + add_change_node(conn->transaction, name, false); ++ wrl_apply_debit_direct(conn); + fire_watches(conn, name, false); + send_ack(conn, XS_SET_PERMS); + } +Index: xen-4.6.5/tools/xenstore/xenstored_core.h +=================================================================== +--- xen-4.6.5.orig/tools/xenstore/xenstored_core.h ++++ xen-4.6.5/tools/xenstore/xenstored_core.h +@@ -30,6 +30,12 @@ + #include "list.h" + #include "tdb.h" + ++#define MIN(a, b) (((a) < (b))? (a) : (b)) ++ ++typedef int32_t wrl_creditt; ++#define WRL_CREDIT_MAX (1000*1000*1000) ++/* ^ satisfies non-overflow condition for wrl_xfer_credit */ ++ + struct buffered_data + { + struct list_head list; +Index: xen-4.6.5/tools/xenstore/xenstored_domain.c +=================================================================== +--- xen-4.6.5.orig/tools/xenstore/xenstored_domain.c ++++ xen-4.6.5/tools/xenstore/xenstored_domain.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + #include "utils.h" + #include "talloc.h" +@@ -73,6 +74,10 @@ struct domain + + /* number of watch for this domain */ + int nbwatch; ++ ++ /* write rate limit */ ++ wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */ ++ struct wrl_timestampt wrl_timestamp; + }; + + static LIST_HEAD(domains); +@@ -205,6 +210,8 @@ static int destroy_domain(void *_domain) + + fire_watches(NULL, "@releaseDomain", false); + ++ wrl_domain_destroy(domain); ++ + return 0; + } + +@@ -252,6 +259,9 @@ void handle_event(void) + bool domain_can_read(struct connection *conn) + { + struct xenstore_domain_interface *intf = conn->domain->interface; ++ ++ if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0) ++ return false; + return (intf->req_cons != intf->req_prod); + } + +@@ -283,6 +293,8 @@ static struct domain *new_domain(void *c + domain->domid = domid; + domain->path = talloc_domain_path(domain, domid); + ++ wrl_domain_new(domain); ++ + list_add(&domain->list, &domains); + talloc_set_destructor(domain, destroy_domain); + +@@ -746,6 +758,209 @@ int domain_watch(struct connection *conn + : 0; + } + ++static wrl_creditt wrl_config_writecost = WRL_FACTOR; ++static wrl_creditt wrl_config_rate = WRL_RATE * WRL_FACTOR; ++static wrl_creditt wrl_config_dburst = WRL_DBURST * WRL_FACTOR; ++static wrl_creditt wrl_config_gburst = WRL_GBURST * WRL_FACTOR; ++static wrl_creditt wrl_config_newdoms_dburst = ++ WRL_DBURST * WRL_NEWDOMS * WRL_FACTOR; ++ ++long wrl_ntransactions; ++ ++static long wrl_ndomains; ++static wrl_creditt wrl_reserve; /* [-wrl_config_newdoms_dburst, +_gburst ] */ ++ ++void wrl_gettime_now(struct wrl_timestampt *now_wt) ++{ ++ struct timespec now_ts; ++ int r; ++ ++ r = clock_gettime(CLOCK_MONOTONIC, &now_ts); ++ if (r) ++ barf_perror("Could not find time (clock_gettime failed)"); ++ ++ now_wt->sec = now_ts.tv_sec; ++ now_wt->msec = now_ts.tv_nsec / 1000000; ++} ++ ++static void wrl_xfer_credit(wrl_creditt *debit, wrl_creditt debit_floor, ++ wrl_creditt *credit, wrl_creditt credit_ceil) ++ /* ++ * Transfers zero or more credit from "debit" to "credit". ++ * Transfers as much as possible while maintaining ++ * debit >= debit_floor and credit <= credit_ceil. ++ * (If that's violated already, does nothing.) ++ * ++ * Sufficient conditions to avoid overflow, either of: ++ * |every argument| <= 0x3fffffff ++ * |every argument| <= 1E9 ++ * |every argument| <= WRL_CREDIT_MAX ++ * (And this condition is preserved.) ++ */ ++{ ++ wrl_creditt xfer = MIN( *debit - debit_floor, ++ credit_ceil - *credit ); ++ if (xfer > 0) { ++ *debit -= xfer; ++ *credit += xfer; ++ } ++} ++ ++void wrl_domain_new(struct domain *domain) ++{ ++ domain->wrl_credit = 0; ++ wrl_gettime_now(&domain->wrl_timestamp); ++ wrl_ndomains++; ++ /* Steal up to DBURST from the reserve */ ++ wrl_xfer_credit(&wrl_reserve, -wrl_config_newdoms_dburst, ++ &domain->wrl_credit, wrl_config_dburst); ++} ++ ++void wrl_domain_destroy(struct domain *domain) ++{ ++ wrl_ndomains--; ++ /* ++ * Don't bother recalculating domain's credit - this just ++ * means we don't give the reserve the ending domain's credit ++ * for time elapsed since last update. ++ */ ++ wrl_xfer_credit(&domain->wrl_credit, 0, ++ &wrl_reserve, wrl_config_dburst); ++} ++ ++void wrl_credit_update(struct domain *domain, struct wrl_timestampt now) ++{ ++ /* ++ * We want to calculate ++ * credit += (now - timestamp) * RATE / ndoms; ++ * But we want it to saturate, and to avoid floating point. ++ * To avoid rounding errors from constantly adding small ++ * amounts of credit, we only add credit for whole milliseconds. ++ */ ++ long seconds = now.sec - domain->wrl_timestamp.sec; ++ long milliseconds = now.msec - domain->wrl_timestamp.msec; ++ long msec; ++ int64_t denom, num; ++ wrl_creditt surplus; ++ ++ seconds = MIN(seconds, 1000*1000); /* arbitrary, prevents overflow */ ++ msec = seconds * 1000 + milliseconds; ++ ++ if (msec < 0) ++ /* shouldn't happen with CLOCK_MONOTONIC */ ++ msec = 0; ++ ++ /* 32x32 -> 64 cannot overflow */ ++ denom = (int64_t)msec * wrl_config_rate; ++ num = (int64_t)wrl_ndomains * 1000; ++ /* denom / num <= 1E6 * wrl_config_rate, so with ++ reasonable wrl_config_rate, denom / num << 2^64 */ ++ ++ /* at last! */ ++ domain->wrl_credit = MIN( (int64_t)domain->wrl_credit + denom / num, ++ WRL_CREDIT_MAX ); ++ /* (maybe briefly violating the DBURST cap on wrl_credit) */ ++ ++ /* maybe take from the reserve to make us nonnegative */ ++ wrl_xfer_credit(&wrl_reserve, 0, ++ &domain->wrl_credit, 0); ++ ++ /* return any surplus (over DBURST) to the reserve */ ++ surplus = 0; ++ wrl_xfer_credit(&domain->wrl_credit, wrl_config_dburst, ++ &surplus, WRL_CREDIT_MAX); ++ wrl_xfer_credit(&surplus, 0, ++ &wrl_reserve, wrl_config_gburst); ++ /* surplus is now implicitly discarded */ ++ ++ domain->wrl_timestamp = now; ++ ++ trace("wrl: dom %4d %6ld msec %9ld credit %9ld reserve" ++ " %9ld discard\n", ++ domain->domid, ++ msec, ++ (long)domain->wrl_credit, (long)wrl_reserve, ++ (long)surplus); ++} ++ ++void wrl_check_timeout(struct domain *domain, ++ struct wrl_timestampt now, ++ int *ptimeout) ++{ ++ uint64_t num, denom; ++ int wakeup; ++ ++ wrl_credit_update(domain, now); ++ ++ if (domain->wrl_credit >= 0) ++ /* not blocked */ ++ return; ++ ++ if (!*ptimeout) ++ /* already decided on immediate wakeup, ++ so no need to calculate our timeout */ ++ return; ++ ++ /* calculate wakeup = now + -credit / (RATE / ndoms); */ ++ ++ /* credit cannot go more -ve than one transaction, ++ * so the first multiplication cannot overflow even 32-bit */ ++ num = (uint64_t)(-domain->wrl_credit * 1000) * wrl_ndomains; ++ denom = wrl_config_rate; ++ ++ wakeup = MIN( num / denom /* uint64_t */, INT_MAX ); ++ if (*ptimeout==-1 || wakeup < *ptimeout) ++ *ptimeout = wakeup; ++ ++ trace("wrl: domain %u credit=%ld (reserve=%ld) SLEEPING for %d\n", ++ domain->domid, ++ (long)domain->wrl_credit, (long)wrl_reserve, ++ wakeup); ++} ++ ++void wrl_apply_debit_actual(struct domain *domain) ++{ ++ struct wrl_timestampt now; ++ ++ if (!domain) ++ /* sockets escape the write rate limit */ ++ return; ++ ++ wrl_gettime_now(&now); ++ wrl_credit_update(domain, now); ++ ++ domain->wrl_credit -= wrl_config_writecost; ++ trace("wrl: domain %u credit=%ld (reserve=%ld)\n", ++ domain->domid, ++ (long)domain->wrl_credit, (long)wrl_reserve); ++} ++ ++void wrl_apply_debit_direct(struct connection *conn) ++{ ++ if (!conn) ++ /* some writes are generated internally */ ++ return; ++ ++ if (conn->transaction) ++ /* these are accounted for when the transaction ends */ ++ return; ++ ++ if (!wrl_ntransactions) ++ /* we don't conflict with anyone */ ++ return; ++ ++ wrl_apply_debit_actual(conn->domain); ++} ++ ++void wrl_apply_debit_trans_commit(struct connection *conn) ++{ ++ if (wrl_ntransactions <= 1) ++ /* our own transaction appears in the counter */ ++ return; ++ ++ wrl_apply_debit_actual(conn->domain); ++} ++ + /* + * Local variables: + * c-file-style: "linux" +Index: xen-4.6.5/tools/xenstore/xenstored_domain.h +=================================================================== +--- xen-4.6.5.orig/tools/xenstore/xenstored_domain.h ++++ xen-4.6.5/tools/xenstore/xenstored_domain.h +@@ -65,4 +65,29 @@ void domain_watch_inc(struct connection + void domain_watch_dec(struct connection *conn); + int domain_watch(struct connection *conn); + ++/* Write rate limiting */ ++ ++#define WRL_FACTOR 1000 /* for fixed-point arithmetic */ ++#define WRL_RATE 200 ++#define WRL_DBURST 10 ++#define WRL_GBURST 1000 ++#define WRL_NEWDOMS 5 ++ ++struct wrl_timestampt { ++ time_t sec; ++ int msec; ++}; ++ ++extern long wrl_ntransactions; ++ ++void wrl_gettime_now(struct wrl_timestampt *now_ts); ++void wrl_domain_new(struct domain *domain); ++void wrl_domain_destroy(struct domain *domain); ++void wrl_credit_update(struct domain *domain, struct wrl_timestampt now); ++void wrl_check_timeout(struct domain *domain, ++ struct wrl_timestampt now, ++ int *ptimeout); ++void wrl_apply_debit_direct(struct connection *conn); ++void wrl_apply_debit_trans_commit(struct connection *conn); ++ + #endif /* _XENSTORED_DOMAIN_H */ +Index: xen-4.6.5/tools/xenstore/xenstored_transaction.c +=================================================================== +--- xen-4.6.5.orig/tools/xenstore/xenstored_transaction.c ++++ xen-4.6.5/tools/xenstore/xenstored_transaction.c +@@ -116,6 +116,7 @@ static int destroy_transaction(void *_tr + { + struct transaction *trans = _transaction; + ++ wrl_ntransactions--; + trace_destroy(trans, "transaction"); + if (trans->tdb) + tdb_close(trans->tdb); +@@ -179,6 +180,7 @@ void do_transaction_start(struct connect + talloc_steal(conn, trans); + talloc_set_destructor(trans, destroy_transaction); + conn->transaction_started++; ++ wrl_ntransactions++; + + snprintf(id_str, sizeof(id_str), "%u", trans->id); + send_reply(conn, XS_TRANSACTION_START, id_str, strlen(id_str)+1); +@@ -213,6 +215,9 @@ void do_transaction_end(struct connectio + send_error(conn, EAGAIN); + return; + } ++ ++ wrl_apply_debit_trans_commit(conn); ++ + if (!replace_tdb(trans->tdb_name, trans->tdb)) { + send_error(conn, errno); + return; diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0002-xenstored-Log-when-the-write-transaction-rate-limit-.patch xen-4.6.5/debian/patches/xsa206-4.6-0002-xenstored-Log-when-the-write-transaction-rate-limit-.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0002-xenstored-Log-when-the-write-transaction-rate-limit-.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0002-xenstored-Log-when-the-write-transaction-rate-limit-.patch 2017-05-09 12:51:18.000000000 +0000 @@ -0,0 +1,113 @@ +From 973b17021b43c825c03ca0619dbeb25d5360a38b Mon Sep 17 00:00:00 2001 +From: Ian Jackson +Date: Sat, 18 Mar 2017 16:45:27 +0000 +Subject: [PATCH 02/23] xenstored: Log when the write transaction rate limit + bites + +Reported-by: Juergen Gross +Signed-off-by: Ian Jackson +--- + tools/xenstore/xenstored_core.c | 1 + + tools/xenstore/xenstored_domain.c | 25 +++++++++++++++++++++++++ + tools/xenstore/xenstored_domain.h | 2 ++ + 3 files changed, 28 insertions(+) + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index 9dd06b1..0061af9 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -377,6 +377,7 @@ static void initialize_fds(int sock, int *p_sock_pollfd_idx, + POLLIN|POLLPRI); + + wrl_gettime_now(&now); ++ wrl_log_periodic(now); + + list_for_each_entry(conn, &connections, list) { + if (conn->domain) { +diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c +index 3cf5c75..ac3d677 100644 +--- a/tools/xenstore/xenstored_domain.c ++++ b/tools/xenstore/xenstored_domain.c +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + #include "utils.h" + #include "talloc.h" +@@ -78,6 +79,7 @@ struct domain + /* write rate limit */ + wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */ + struct wrl_timestampt wrl_timestamp; ++ bool wrl_delay_logged; + }; + + static LIST_HEAD(domains); +@@ -769,6 +771,7 @@ long wrl_ntransactions; + + static long wrl_ndomains; + static wrl_creditt wrl_reserve; /* [-wrl_config_newdoms_dburst, +_gburst ] */ ++static time_t wrl_log_last_warning; /* 0: no previous warning */ + + void wrl_gettime_now(struct wrl_timestampt *now_wt) + { +@@ -918,6 +921,9 @@ void wrl_check_timeout(struct domain *domain, + wakeup); + } + ++#define WRL_LOG(now, ...) \ ++ (syslog(LOG_WARNING, "write rate limit: " __VA_ARGS__)) ++ + void wrl_apply_debit_actual(struct domain *domain) + { + struct wrl_timestampt now; +@@ -933,6 +939,25 @@ void wrl_apply_debit_actual(struct domain *domain) + trace("wrl: domain %u credit=%ld (reserve=%ld)\n", + domain->domid, + (long)domain->wrl_credit, (long)wrl_reserve); ++ ++ if (domain->wrl_credit < 0) { ++ if (!domain->wrl_delay_logged++) { ++ WRL_LOG(now, "domain %ld is affected", ++ (long)domain->domid); ++ } else if (!wrl_log_last_warning) { ++ WRL_LOG(now, "rate limiting restarts"); ++ } ++ wrl_log_last_warning = now.sec; ++ } ++} ++ ++void wrl_log_periodic(struct wrl_timestampt now) ++{ ++ if (wrl_log_last_warning && ++ (now.sec - wrl_log_last_warning) > WRL_LOGEVERY) { ++ WRL_LOG(now, "not in force recently"); ++ wrl_log_last_warning = 0; ++ } + } + + void wrl_apply_debit_direct(struct connection *conn) +diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h +index bdc4044..2b963ed 100644 +--- a/tools/xenstore/xenstored_domain.h ++++ b/tools/xenstore/xenstored_domain.h +@@ -72,6 +72,7 @@ int domain_watch(struct connection *conn); + #define WRL_DBURST 10 + #define WRL_GBURST 1000 + #define WRL_NEWDOMS 5 ++#define WRL_LOGEVERY 120 /* seconds */ + + struct wrl_timestampt { + time_t sec; +@@ -87,6 +88,7 @@ void wrl_credit_update(struct domain *domain, struct wrl_timestampt now); + void wrl_check_timeout(struct domain *domain, + struct wrl_timestampt now, + int *ptimeout); ++void wrl_log_periodic(struct wrl_timestampt now); + void wrl_apply_debit_direct(struct connection *conn); + void wrl_apply_debit_trans_commit(struct connection *conn); + +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0003-oxenstored-refactor-putting-response-on-wire.patch xen-4.6.5/debian/patches/xsa206-4.6-0003-oxenstored-refactor-putting-response-on-wire.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0003-oxenstored-refactor-putting-response-on-wire.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0003-oxenstored-refactor-putting-response-on-wire.patch 2017-05-09 12:51:18.000000000 +0000 @@ -0,0 +1,127 @@ +From 9cb3ad7a8a0749d8e8633b0ff56afc268e42dc13 Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Thu, 23 Mar 2017 16:27:23 +0000 +Subject: [PATCH 03/23] oxenstored: refactor putting response on wire + +Previously, the functions reply_{ack,data,data_or_ack} and input_handle_error +put the response on the wire by invoking Connection.send_{ack,reply,error}. + +Instead, these functions now return a value indicating what needs to be put on +the wire, and that action is done by a send_response function called +afterwards. + +This refactoring gives us a chance to store the value of the response, useful +for replaying transactions. + +Reported-by: Juergen Gross +Signed-off-by: Jonathan Davies +Reviewed-by: Andrew Cooper +Reviewed-by: Jon Ludlam +Reviewed-by: Euan Harris +Acked-by: David Scott +--- + tools/ocaml/xenstored/Makefile | 1 + + tools/ocaml/xenstored/packet.ml | 4 ++++ + tools/ocaml/xenstored/process.ml | 34 ++++++++++++++++++++++++---------- + 3 files changed, 29 insertions(+), 10 deletions(-) + create mode 100644 tools/ocaml/xenstored/packet.ml + +diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile +index 59875f7..dce9e70 100644 +--- a/tools/ocaml/xenstored/Makefile ++++ b/tools/ocaml/xenstored/Makefile +@@ -36,6 +36,7 @@ OBJS = define \ + stdext \ + trie \ + config \ ++ packet \ + logging \ + quota \ + perms \ +diff --git a/tools/ocaml/xenstored/packet.ml b/tools/ocaml/xenstored/packet.ml +new file mode 100644 +index 0000000..c8ecfe5 +--- /dev/null ++++ b/tools/ocaml/xenstored/packet.ml +@@ -0,0 +1,4 @@ ++type response = ++ | Ack of (unit -> unit) (* function is the action to execute after sending the ack *) ++ | Reply of string ++ | Error of string +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index e827678..3377966 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -126,8 +126,7 @@ let do_watch con t rid domains cons data = + | _ -> raise Invalid_Cmd_Args + in + let watch = Connections.add_watch cons con node token in +- Connection.send_ack con (Transaction.get_id t) rid Xenbus.Xb.Op.Watch; +- Connection.fire_single_watch watch ++ Packet.Ack (fun () -> Connection.fire_single_watch watch) + + let do_unwatch con t domains cons data = + let (node, token) = +@@ -289,20 +288,32 @@ let do_set_target con t domains cons data = + | _ -> raise Invalid_Cmd_Args + + (*------------- Generic handling of ty ------------------*) ++let send_response ty con t rid response = ++ match response with ++ | Packet.Ack f -> ++ Connection.send_ack con (Transaction.get_id t) rid ty; ++ (* Now do any necessary follow-up actions *) ++ f () ++ | Packet.Reply ret -> ++ Connection.send_reply con (Transaction.get_id t) rid ty ret ++ | Packet.Error e -> ++ Connection.send_error con (Transaction.get_id t) rid e ++ + let reply_ack fct ty con t rid doms cons data = + fct con t doms cons data; +- Connection.send_ack con (Transaction.get_id t) rid ty; +- if Transaction.get_id t = Transaction.none then +- process_watch (Transaction.get_ops t) cons ++ Packet.Ack (fun () -> ++ if Transaction.get_id t = Transaction.none then ++ process_watch (Transaction.get_ops t) cons ++ ) + + let reply_data fct ty con t rid doms cons data = + let ret = fct con t doms cons data in +- Connection.send_reply con (Transaction.get_id t) rid ty ret ++ Packet.Reply ret + + let reply_data_or_ack fct ty con t rid doms cons data = + match fct con t doms cons data with +- | Some ret -> Connection.send_reply con (Transaction.get_id t) rid ty ret +- | None -> Connection.send_ack con (Transaction.get_id t) rid ty ++ | Some ret -> Packet.Reply ret ++ | None -> Packet.Ack (fun () -> ()) + + let reply_none fct ty con t rid doms cons data = + (* let the function reply *) +@@ -335,7 +346,7 @@ let function_of_type ty = + + let input_handle_error ~cons ~doms ~fct ~ty ~con ~t ~rid ~data = + let reply_error e = +- Connection.send_error con (Transaction.get_id t) rid e in ++ Packet.Error e in + try + fct ty con t rid doms cons data + with +@@ -368,7 +379,10 @@ let process_packet ~store ~cons ~doms ~con ~tid ~rid ~ty ~data = + else + Connection.get_transaction con tid + in +- input_handle_error ~cons ~doms ~fct ~ty ~con ~t ~rid ~data; ++ let response = input_handle_error ~cons ~doms ~fct ~ty ~con ~t ~rid ~data in ++ ++ (* Put the response on the wire *) ++ send_response ty con t rid response + with exn -> + error "process packet: %s" (Printexc.to_string exn); + Connection.send_error con tid rid "EIO" +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0004-oxenstored-remove-some-unused-parameters.patch xen-4.6.5/debian/patches/xsa206-4.6-0004-oxenstored-remove-some-unused-parameters.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0004-oxenstored-remove-some-unused-parameters.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0004-oxenstored-remove-some-unused-parameters.patch 2017-05-09 12:51:18.000000000 +0000 @@ -0,0 +1,71 @@ +From 5d641cb4019a22d0818709d6637825082e0f5c97 Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Thu, 23 Mar 2017 16:27:39 +0000 +Subject: [PATCH 04/23] oxenstored: remove some unused parameters + +Reported-by: Juergen Gross +Signed-off-by: Jonathan Davies +Reviewed-by: Andrew Cooper +Reviewed-by: Jon Ludlam +Reviewed-by: Euan Harris +Acked-by: David Scott +--- + tools/ocaml/xenstored/process.ml | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 3377966..7a73669 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -119,7 +119,7 @@ let do_getperms con t domains cons data = + let perms = Transaction.getperms t (Connection.get_perm con) path in + Perms.Node.to_string perms ^ "\000" + +-let do_watch con t rid domains cons data = ++let do_watch con t domains cons data = + let (node, token) = + match (split None '\000' data) with + | [node; token; ""] -> node, token +@@ -299,25 +299,25 @@ let send_response ty con t rid response = + | Packet.Error e -> + Connection.send_error con (Transaction.get_id t) rid e + +-let reply_ack fct ty con t rid doms cons data = ++let reply_ack fct con t doms cons data = + fct con t doms cons data; + Packet.Ack (fun () -> + if Transaction.get_id t = Transaction.none then + process_watch (Transaction.get_ops t) cons + ) + +-let reply_data fct ty con t rid doms cons data = ++let reply_data fct con t doms cons data = + let ret = fct con t doms cons data in + Packet.Reply ret + +-let reply_data_or_ack fct ty con t rid doms cons data = ++let reply_data_or_ack fct con t doms cons data = + match fct con t doms cons data with + | Some ret -> Packet.Reply ret + | None -> Packet.Ack (fun () -> ()) + +-let reply_none fct ty con t rid doms cons data = ++let reply_none fct con t doms cons data = + (* let the function reply *) +- fct con t rid doms cons data ++ fct con t doms cons data + + let function_of_type ty = + match ty with +@@ -348,7 +348,7 @@ let input_handle_error ~cons ~doms ~fct ~ty ~con ~t ~rid ~data = + let reply_error e = + Packet.Error e in + try +- fct ty con t rid doms cons data ++ fct con t doms cons data + with + | Define.Invalid_path -> reply_error "EINVAL" + | Define.Already_exist -> reply_error "EEXIST" +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0005-oxenstored-refactor-request-processing.patch xen-4.6.5/debian/patches/xsa206-4.6-0005-oxenstored-refactor-request-processing.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0005-oxenstored-refactor-request-processing.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0005-oxenstored-refactor-request-processing.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,94 @@ +From 03190ba306d478b4ea70535f4a19c32ad1cf63c2 Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Thu, 23 Mar 2017 16:27:50 +0000 +Subject: [PATCH 05/23] oxenstored: refactor request processing + +Encapsulate the request in a record that is passed from do_input to +process_packet and input_handle_error. + +This will be helpful when keeping track of the requests made as part of a +transaction. + +Reported-by: Juergen Gross +Signed-off-by: Jonathan Davies +Reviewed-by: Andrew Cooper +Reviewed-by: Jon Ludlam +Reviewed-by: Euan Harris +Acked-by: David Scott +--- + tools/ocaml/xenstored/packet.ml | 7 +++++++ + tools/ocaml/xenstored/process.ml | 15 ++++++++++----- + 2 files changed, 17 insertions(+), 5 deletions(-) + +diff --git a/tools/ocaml/xenstored/packet.ml b/tools/ocaml/xenstored/packet.ml +index c8ecfe5..22cae1d 100644 +--- a/tools/ocaml/xenstored/packet.ml ++++ b/tools/ocaml/xenstored/packet.ml +@@ -1,3 +1,10 @@ ++type request = { ++ tid: int; ++ rid: int; ++ ty: Xenbus.Xb.Op.operation; ++ data: string; ++} ++ + type response = + | Ack of (unit -> unit) (* function is the action to execute after sending the ack *) + | Reply of string +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 7a73669..c92bec7 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -344,11 +344,11 @@ let function_of_type ty = + | Xenbus.Xb.Op.Invalid -> reply_ack do_error + | _ -> reply_ack do_error + +-let input_handle_error ~cons ~doms ~fct ~ty ~con ~t ~rid ~data = ++let input_handle_error ~cons ~doms ~fct ~con ~t ~req = + let reply_error e = + Packet.Error e in + try +- fct con t doms cons data ++ fct con t doms cons req.Packet.data + with + | Define.Invalid_path -> reply_error "EINVAL" + | Define.Already_exist -> reply_error "EEXIST" +@@ -370,7 +370,10 @@ let input_handle_error ~cons ~doms ~fct ~ty ~con ~t ~rid ~data = + (** + * Nothrow guarantee. + *) +-let process_packet ~store ~cons ~doms ~con ~tid ~rid ~ty ~data = ++let process_packet ~store ~cons ~doms ~con ~req = ++ let ty = req.Packet.ty in ++ let tid = req.Packet.tid in ++ let rid = req.Packet.rid in + try + let fct = function_of_type ty in + let t = +@@ -379,7 +382,7 @@ let process_packet ~store ~cons ~doms ~con ~tid ~rid ~ty ~data = + else + Connection.get_transaction con tid + in +- let response = input_handle_error ~cons ~doms ~fct ~ty ~con ~t ~rid ~data in ++ let response = input_handle_error ~cons ~doms ~fct ~con ~t ~req in + + (* Put the response on the wire *) + send_response ty con t rid response +@@ -412,11 +415,13 @@ let do_input store cons doms con = + if newpacket then ( + let packet = Connection.pop_in con in + let tid, rid, ty, data = Xenbus.Xb.Packet.unpack packet in ++ let req = {Packet.tid; Packet.rid; Packet.ty; Packet.data} in ++ + (* As we don't log IO, do not call an unnecessary sanitize_data + info "[%s] -> [%d] %s \"%s\"" + (Connection.get_domstr con) tid + (Xenbus.Xb.Op.to_string ty) (sanitize_data data); *) +- process_packet ~store ~cons ~doms ~con ~tid ~rid ~ty ~data; ++ process_packet ~store ~cons ~doms ~con ~req; + write_access_log ~ty ~tid ~con ~data; + Connection.incr_ops con; + ) +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0006-oxenstored-keep-track-of-each-transaction-s-operatio.patch xen-4.6.5/debian/patches/xsa206-4.6-0006-oxenstored-keep-track-of-each-transaction-s-operatio.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0006-oxenstored-keep-track-of-each-transaction-s-operatio.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0006-oxenstored-keep-track-of-each-transaction-s-operatio.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,160 @@ +From 7e956305bb7990d39fd57a05820dcbf0e1ff3c38 Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Thu, 23 Mar 2017 16:27:58 +0000 +Subject: [PATCH 06/23] oxenstored: keep track of each transaction's operations + +A list of (request, response) pairs from the operations performed within the +transaction will be useful to support transaction replay. + +Since this consumes memory, the number of requests per transaction must not be +left unbounded. Hence a new quota for this is introduced. This quota, configured +via the configuration key 'quota-maxrequests', limits the size of transactions +initiated by domUs. + +After the maximum number of requests has been exhausted, any further requests +will result in EQUOTA errors. The client may then choose to end the transaction; +a successful commit will result in the retention of only the prior requests. + +Reported-by: Juergen Gross +Signed-off-by: Jonathan Davies +Reviewed-by: Andrew Cooper +Reviewed-by: Jon Ludlam +Reviewed-by: Euan Harris +Acked-by: David Scott +--- + tools/ocaml/xenstored/define.ml | 1 + + tools/ocaml/xenstored/oxenstored.conf | 1 + + tools/ocaml/xenstored/process.ml | 13 +++++++++++-- + tools/ocaml/xenstored/transaction.ml | 21 +++++++++++++++------ + tools/ocaml/xenstored/xenstored.ml | 1 + + 5 files changed, 29 insertions(+), 8 deletions(-) + +diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml +index 89a6aac..d60861c 100644 +--- a/tools/ocaml/xenstored/define.ml ++++ b/tools/ocaml/xenstored/define.ml +@@ -27,6 +27,7 @@ let default_config_dir = "/etc/xen" + + let maxwatch = ref (50) + let maxtransaction = ref (20) ++let maxrequests = ref (-1) (* maximum requests per transaction *) + + let domid_self = 0x7FF0 + +diff --git a/tools/ocaml/xenstored/oxenstored.conf b/tools/ocaml/xenstored/oxenstored.conf +index dd20eda..ac60f49 100644 +--- a/tools/ocaml/xenstored/oxenstored.conf ++++ b/tools/ocaml/xenstored/oxenstored.conf +@@ -18,6 +18,7 @@ quota-maxentity = 1000 + quota-maxsize = 2048 + quota-maxwatch = 100 + quota-transaction = 10 ++quota-maxrequests = 1024 + + # Activate filed base backend + persistent = false +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index c92bec7..758ade1 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -155,7 +155,7 @@ let do_transaction_end con t domains cons data = + if not success then + raise Transaction_again; + if commit then +- process_watch (List.rev (Transaction.get_ops t)) cons ++ process_watch (List.rev (Transaction.get_paths t)) cons + + let do_introduce con t domains cons data = + if not (Connection.is_dom0 con) +@@ -303,7 +303,7 @@ let reply_ack fct con t doms cons data = + fct con t doms cons data; + Packet.Ack (fun () -> + if Transaction.get_id t = Transaction.none then +- process_watch (Transaction.get_ops t) cons ++ process_watch (Transaction.get_paths t) cons + ) + + let reply_data fct con t doms cons data = +@@ -384,6 +384,15 @@ let process_packet ~store ~cons ~doms ~con ~req = + in + let response = input_handle_error ~cons ~doms ~fct ~con ~t ~req in + ++ let response = try ++ if tid <> Transaction.none then ++ (* Remember the request and response for this operation in case we need to replay the transaction *) ++ Transaction.add_operation ~perm:(Connection.get_perm con) t req response; ++ response ++ with Quota.Limit_reached -> ++ Packet.Error "EQUOTA" ++ in ++ + (* Put the response on the wire *) + send_response ty con t rid response + with exn -> +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index 77de4e8..6b37fc2 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -75,7 +75,8 @@ type t = { + ty: ty; + store: Store.t; + quota: Quota.t; +- mutable ops: (Xenbus.Xb.Op.operation * Store.Path.t) list; ++ mutable paths: (Xenbus.Xb.Op.operation * Store.Path.t) list; ++ mutable operations: (Packet.request * Packet.response) list; + mutable read_lowpath: Store.Path.t option; + mutable write_lowpath: Store.Path.t option; + } +@@ -86,16 +87,24 @@ let make id store = + ty = ty; + store = if id = none then store else Store.copy store; + quota = Quota.copy store.Store.quota; +- ops = []; ++ paths = []; ++ operations = []; + read_lowpath = None; + write_lowpath = None; + } + + let get_id t = match t.ty with No -> none | Full (id, _, _) -> id + let get_store t = t.store +-let get_ops t = t.ops +- +-let add_wop t ty path = t.ops <- (ty, path) :: t.ops ++let get_paths t = t.paths ++ ++let add_wop t ty path = t.paths <- (ty, path) :: t.paths ++let add_operation ~perm t request response = ++ if !Define.maxrequests >= 0 ++ && not (Perms.Connection.is_dom0 perm) ++ && List.length t.operations >= !Define.maxrequests ++ then raise Quota.Limit_reached; ++ t.operations <- (request, response) :: t.operations ++let get_operations t = List.rev t.operations + let set_read_lowpath t path = t.read_lowpath <- get_lowest path t.read_lowpath + let set_write_lowpath t path = t.write_lowpath <- get_lowest path t.write_lowpath + +@@ -141,7 +150,7 @@ let getperms t perm path = + r + + let commit ~con t = +- let has_write_ops = List.length t.ops > 0 in ++ let has_write_ops = List.length t.paths > 0 in + let has_coalesced = ref false in + let has_commited = + match t.ty with +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 42b8183..7d3df43 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -95,6 +95,7 @@ let parse_config filename = + ("quota-transaction", Config.Set_int Define.maxtransaction); + ("quota-maxentity", Config.Set_int Quota.maxent); + ("quota-maxsize", Config.Set_int Quota.maxsize); ++ ("quota-maxrequests", Config.Set_int Define.maxrequests); + ("test-eagain", Config.Set_bool Transaction.test_eagain); + ("persistent", Config.Set_bool Disk.enable); + ("xenstored-log-file", Config.String Logging.set_xenstored_log_destination); +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0007-oxenstored-move-functions-that-process-simple-operat.patch xen-4.6.5/debian/patches/xsa206-4.6-0007-oxenstored-move-functions-that-process-simple-operat.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0007-oxenstored-move-functions-that-process-simple-operat.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0007-oxenstored-move-functions-that-process-simple-operat.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,286 @@ +From 9d62d32ee9813f5b340eb71b8e6ccd8cce45404b Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Thu, 23 Mar 2017 16:28:08 +0000 +Subject: [PATCH 07/23] oxenstored: move functions that process simple + operations + +Separate the functions which process operations that can be done as part of a +transaction. Specifically, these operations are: read, write, rm, getperms, +setperms, getdomainpath, directory, mkdir. + +Also split function_of_type into two functions: one for processing the simple +operations and one for processing the rest. + +This will help allow replay of transactions, allowing us to invoke the functions +that process the simple operations as part of the processing of transaction_end. + +Reported-by: Juergen Gross +Signed-off-by: Jonathan Davies +Reviewed-by: Andrew Cooper +Reviewed-by: Jon Ludlam +Reviewed-by: Euan Harris +Acked-by: David Scott +--- + tools/ocaml/xenstored/process.ml | 223 +++++++++++++++++++++------------------ + 1 file changed, 121 insertions(+), 102 deletions(-) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 758ade1..39ae71b 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -119,94 +119,6 @@ let do_getperms con t domains cons data = + let perms = Transaction.getperms t (Connection.get_perm con) path in + Perms.Node.to_string perms ^ "\000" + +-let do_watch con t domains cons data = +- let (node, token) = +- match (split None '\000' data) with +- | [node; token; ""] -> node, token +- | _ -> raise Invalid_Cmd_Args +- in +- let watch = Connections.add_watch cons con node token in +- Packet.Ack (fun () -> Connection.fire_single_watch watch) +- +-let do_unwatch con t domains cons data = +- let (node, token) = +- match (split None '\000' data) with +- | [node; token; ""] -> node, token +- | _ -> raise Invalid_Cmd_Args +- in +- Connections.del_watch cons con node token +- +-let do_transaction_start con t domains cons data = +- if Transaction.get_id t <> Transaction.none then +- raise Transaction_nested; +- let store = Transaction.get_store t in +- string_of_int (Connection.start_transaction con store) ^ "\000" +- +-let do_transaction_end con t domains cons data = +- let commit = +- match (split None '\000' data) with +- | "T" :: _ -> true +- | "F" :: _ -> false +- | x :: _ -> raise (Invalid_argument x) +- | _ -> raise Invalid_Cmd_Args +- in +- let success = +- Connection.end_transaction con (Transaction.get_id t) commit in +- if not success then +- raise Transaction_again; +- if commit then +- process_watch (List.rev (Transaction.get_paths t)) cons +- +-let do_introduce con t domains cons data = +- if not (Connection.is_dom0 con) +- then raise Define.Permission_denied; +- let (domid, mfn, port) = +- match (split None '\000' data) with +- | domid :: mfn :: port :: _ -> +- int_of_string domid, Nativeint.of_string mfn, int_of_string port +- | _ -> raise Invalid_Cmd_Args; +- in +- let dom = +- if Domains.exist domains domid then +- Domains.find domains domid +- else try +- let ndom = Xenctrl.with_intf (fun xc -> +- Domains.create xc domains domid mfn port) in +- Connections.add_domain cons ndom; +- Connections.fire_spec_watches cons "@introduceDomain"; +- ndom +- with _ -> raise Invalid_Cmd_Args +- in +- if (Domain.get_remote_port dom) <> port || (Domain.get_mfn dom) <> mfn then +- raise Domain_not_match +- +-let do_release con t domains cons data = +- if not (Connection.is_dom0 con) +- then raise Define.Permission_denied; +- let domid = +- match (split None '\000' data) with +- | [domid;""] -> int_of_string domid +- | _ -> raise Invalid_Cmd_Args +- in +- let fire_spec_watches = Domains.exist domains domid in +- Domains.del domains domid; +- Connections.del_domain cons domid; +- if fire_spec_watches +- then Connections.fire_spec_watches cons "@releaseDomain" +- else raise Invalid_Cmd_Args +- +-let do_resume con t domains cons data = +- if not (Connection.is_dom0 con) +- then raise Define.Permission_denied; +- let domid = +- match (split None '\000' data) with +- | domid :: _ -> int_of_string domid +- | _ -> raise Invalid_Cmd_Args +- in +- if Domains.exist domains domid +- then Domains.resume domains domid +- else raise Invalid_Cmd_Args +- + let do_getdomainpath con t domains cons data = + let domid = + match (split None '\000' data) with +@@ -319,29 +231,31 @@ let reply_none fct con t doms cons data = + (* let the function reply *) + fct con t doms cons data + +-let function_of_type ty = ++(* Functions for 'simple' operations that cannot be part of a transaction *) ++let function_of_type_simple_op ty = + match ty with +- | Xenbus.Xb.Op.Debug -> reply_data_or_ack do_debug ++ | Xenbus.Xb.Op.Debug ++ | Xenbus.Xb.Op.Watch ++ | Xenbus.Xb.Op.Unwatch ++ | Xenbus.Xb.Op.Transaction_start ++ | Xenbus.Xb.Op.Transaction_end ++ | Xenbus.Xb.Op.Introduce ++ | Xenbus.Xb.Op.Release ++ | Xenbus.Xb.Op.Isintroduced ++ | Xenbus.Xb.Op.Resume ++ | Xenbus.Xb.Op.Set_target ++ | Xenbus.Xb.Op.Restrict ++ | Xenbus.Xb.Op.Reset_watches ++ | Xenbus.Xb.Op.Invalid -> error "called function_of_type_simple_op on operation %s" (Xenbus.Xb.Op.to_string ty); ++ raise (Invalid_argument (Xenbus.Xb.Op.to_string ty)) + | Xenbus.Xb.Op.Directory -> reply_data do_directory + | Xenbus.Xb.Op.Read -> reply_data do_read + | Xenbus.Xb.Op.Getperms -> reply_data do_getperms +- | Xenbus.Xb.Op.Watch -> reply_none do_watch +- | Xenbus.Xb.Op.Unwatch -> reply_ack do_unwatch +- | Xenbus.Xb.Op.Transaction_start -> reply_data do_transaction_start +- | Xenbus.Xb.Op.Transaction_end -> reply_ack do_transaction_end +- | Xenbus.Xb.Op.Introduce -> reply_ack do_introduce +- | Xenbus.Xb.Op.Release -> reply_ack do_release + | Xenbus.Xb.Op.Getdomainpath -> reply_data do_getdomainpath + | Xenbus.Xb.Op.Write -> reply_ack do_write + | Xenbus.Xb.Op.Mkdir -> reply_ack do_mkdir + | Xenbus.Xb.Op.Rm -> reply_ack do_rm + | Xenbus.Xb.Op.Setperms -> reply_ack do_setperms +- | Xenbus.Xb.Op.Isintroduced -> reply_data do_isintroduced +- | Xenbus.Xb.Op.Resume -> reply_ack do_resume +- | Xenbus.Xb.Op.Set_target -> reply_ack do_set_target +- | Xenbus.Xb.Op.Restrict -> reply_ack do_restrict +- | Xenbus.Xb.Op.Reset_watches -> reply_ack do_reset_watches +- | Xenbus.Xb.Op.Invalid -> reply_ack do_error + | _ -> reply_ack do_error + + let input_handle_error ~cons ~doms ~fct ~con ~t ~req = +@@ -367,6 +281,111 @@ let input_handle_error ~cons ~doms ~fct ~con ~t ~req = + | (Failure "int_of_string") -> reply_error "EINVAL" + | Define.Unknown_operation -> reply_error "ENOSYS" + ++let do_watch con t domains cons data = ++ let (node, token) = ++ match (split None '\000' data) with ++ | [node; token; ""] -> node, token ++ | _ -> raise Invalid_Cmd_Args ++ in ++ let watch = Connections.add_watch cons con node token in ++ Packet.Ack (fun () -> Connection.fire_single_watch watch) ++ ++let do_unwatch con t domains cons data = ++ let (node, token) = ++ match (split None '\000' data) with ++ | [node; token; ""] -> node, token ++ | _ -> raise Invalid_Cmd_Args ++ in ++ Connections.del_watch cons con node token ++ ++let do_transaction_start con t domains cons data = ++ if Transaction.get_id t <> Transaction.none then ++ raise Transaction_nested; ++ let store = Transaction.get_store t in ++ string_of_int (Connection.start_transaction con store) ^ "\000" ++ ++let do_transaction_end con t domains cons data = ++ let commit = ++ match (split None '\000' data) with ++ | "T" :: _ -> true ++ | "F" :: _ -> false ++ | x :: _ -> raise (Invalid_argument x) ++ | _ -> raise Invalid_Cmd_Args ++ in ++ let success = ++ Connection.end_transaction con (Transaction.get_id t) commit in ++ if not success then ++ raise Transaction_again; ++ if commit then ++ process_watch (List.rev (Transaction.get_paths t)) cons ++ ++let do_introduce con t domains cons data = ++ if not (Connection.is_dom0 con) ++ then raise Define.Permission_denied; ++ let (domid, mfn, port) = ++ match (split None '\000' data) with ++ | domid :: mfn :: port :: _ -> ++ int_of_string domid, Nativeint.of_string mfn, int_of_string port ++ | _ -> raise Invalid_Cmd_Args; ++ in ++ let dom = ++ if Domains.exist domains domid then ++ Domains.find domains domid ++ else try ++ let ndom = Xenctrl.with_intf (fun xc -> ++ Domains.create xc domains domid mfn port) in ++ Connections.add_domain cons ndom; ++ Connections.fire_spec_watches cons "@introduceDomain"; ++ ndom ++ with _ -> raise Invalid_Cmd_Args ++ in ++ if (Domain.get_remote_port dom) <> port || (Domain.get_mfn dom) <> mfn then ++ raise Domain_not_match ++ ++let do_release con t domains cons data = ++ if not (Connection.is_dom0 con) ++ then raise Define.Permission_denied; ++ let domid = ++ match (split None '\000' data) with ++ | [domid;""] -> int_of_string domid ++ | _ -> raise Invalid_Cmd_Args ++ in ++ let fire_spec_watches = Domains.exist domains domid in ++ Domains.del domains domid; ++ Connections.del_domain cons domid; ++ if fire_spec_watches ++ then Connections.fire_spec_watches cons "@releaseDomain" ++ else raise Invalid_Cmd_Args ++ ++let do_resume con t domains cons data = ++ if not (Connection.is_dom0 con) ++ then raise Define.Permission_denied; ++ let domid = ++ match (split None '\000' data) with ++ | domid :: _ -> int_of_string domid ++ | _ -> raise Invalid_Cmd_Args ++ in ++ if Domains.exist domains domid ++ then Domains.resume domains domid ++ else raise Invalid_Cmd_Args ++ ++let function_of_type ty = ++ match ty with ++ | Xenbus.Xb.Op.Debug -> reply_data_or_ack do_debug ++ | Xenbus.Xb.Op.Watch -> reply_none do_watch ++ | Xenbus.Xb.Op.Unwatch -> reply_ack do_unwatch ++ | Xenbus.Xb.Op.Transaction_start -> reply_data do_transaction_start ++ | Xenbus.Xb.Op.Transaction_end -> reply_ack do_transaction_end ++ | Xenbus.Xb.Op.Introduce -> reply_ack do_introduce ++ | Xenbus.Xb.Op.Release -> reply_ack do_release ++ | Xenbus.Xb.Op.Isintroduced -> reply_data do_isintroduced ++ | Xenbus.Xb.Op.Resume -> reply_ack do_resume ++ | Xenbus.Xb.Op.Set_target -> reply_ack do_set_target ++ | Xenbus.Xb.Op.Restrict -> reply_ack do_restrict ++ | Xenbus.Xb.Op.Reset_watches -> reply_ack do_reset_watches ++ | Xenbus.Xb.Op.Invalid -> reply_ack do_error ++ | _ -> function_of_type_simple_op ty ++ + (** + * Nothrow guarantee. + *) +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0008-oxenstored-replay-transaction-upon-conflict.patch xen-4.6.5/debian/patches/xsa206-4.6-0008-oxenstored-replay-transaction-upon-conflict.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0008-oxenstored-replay-transaction-upon-conflict.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0008-oxenstored-replay-transaction-upon-conflict.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,137 @@ +From 5c58daab885c0ab19200df95773e309a950154ad Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Thu, 23 Mar 2017 16:28:19 +0000 +Subject: [PATCH 08/23] oxenstored: replay transaction upon conflict + +The existing transaction merge algorithm keeps track of the least upper bound +(longest common prefix) of all the nodes which have been read and written, and +will re-combine two stores which have disjoint upper bounds. This works well for +small transactions but causes unnecessary conflicts for ones that span a large +subtree, such as the following ones used by the xapi toolstack: + + * VM start: creates /vm/... /vss/... /local/domain/... + The least upper bound of this transaction is / and so all + these transactions conflict with everything. + + * Device hotplug: creates /local/domain/0/... /local/domain/n/... + The least upper bound of this transaction is /local/domain so + all these transactions conflict with each other. + +If the existing merge algorithm cannot merge and commit, we attempt +a /replay/ of the failed transaction against the new store. + +When we replay the requests we check whether the response sent to the client is +the same as during the first attempt at the transaction. If the responses are +all the same then the transaction replay can be committed. If any differ then +the transaction replay must be aborted and the client must retry. + +This algorithm uses the intuition that the transactions made by the toolstack +are designed to be for separate domains, and should fundamentally not conflict +in the sense that they don't read or write any shared keys. By replaying the +transaction on the server side we do what the client would have to do anyway, +only we can do it quickly without allowing any other requests to interfere. + +Performing 300 parallel simulated VM start and shutdowns without this code: + +300 parallel starts and shutdowns: 268.92 + +Performing 300 parallel simulated VM start and shutdowns with this code: + +300 parallel starts and shutdowns: 3.80 + +Reported-by: Juergen Gross +Signed-off-by: Dave Scott +Signed-off-by: Jonathan Davies +Reviewed-by: Andrew Cooper +Reviewed-by: Jon Ludlam +Reviewed-by: Euan Harris +Acked-by: David Scott +--- + tools/ocaml/xenstored/connection.ml | 5 ++++- + tools/ocaml/xenstored/packet.ml | 5 +++++ + tools/ocaml/xenstored/process.ml | 33 +++++++++++++++++++++++++++++++++ + 3 files changed, 42 insertions(+), 1 deletion(-) + +diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml +index 0a2c481..b18336f 100644 +--- a/tools/ocaml/xenstored/connection.ml ++++ b/tools/ocaml/xenstored/connection.ml +@@ -233,7 +233,10 @@ let end_transaction con tid commit = + let trans = Hashtbl.find con.transactions tid in + Hashtbl.remove con.transactions tid; + Logging.end_transaction ~tid ~con:(get_domstr con); +- if commit then Transaction.commit ~con:(get_domstr con) trans else true ++ match commit with ++ | None -> true ++ | Some transaction_replay_f -> ++ Transaction.commit ~con:(get_domstr con) trans || transaction_replay_f con trans + + let get_transaction con tid = + Hashtbl.find con.transactions tid +diff --git a/tools/ocaml/xenstored/packet.ml b/tools/ocaml/xenstored/packet.ml +index 22cae1d..aeae0a4 100644 +--- a/tools/ocaml/xenstored/packet.ml ++++ b/tools/ocaml/xenstored/packet.ml +@@ -9,3 +9,8 @@ type response = + | Ack of (unit -> unit) (* function is the action to execute after sending the ack *) + | Reply of string + | Error of string ++ ++let response_equal a b = ++ match (a, b) with ++ | (Ack _, Ack _) -> true (* just consider the response, not the post-response action *) ++ | (x, y) -> x = y +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 39ae71b..6d1f551 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -281,6 +281,38 @@ let input_handle_error ~cons ~doms ~fct ~con ~t ~req = + | (Failure "int_of_string") -> reply_error "EINVAL" + | Define.Unknown_operation -> reply_error "ENOSYS" + ++(* Replay a stored transaction against a fresh store, check the responses are ++ all equivalent: if so, commit the transaction. Otherwise send the abort to ++ the client. *) ++let transaction_replay c t doms cons = ++ match t.Transaction.ty with ++ | Transaction.No -> ++ error "attempted to replay a non-full transaction"; ++ false ++ | Transaction.Full(id, oldroot, cstore) -> ++ let tid = Connection.start_transaction c cstore in ++ let new_t = Transaction.make tid cstore in ++ let con = sprintf "r(%d):%s" id (Connection.get_domstr c) in ++ let perform_exn (request, response) = ++ let fct = function_of_type_simple_op request.Packet.ty in ++ let response' = input_handle_error ~cons ~doms ~fct ~con:c ~t:new_t ~req:request in ++ if not(Packet.response_equal response response') then raise Transaction_again in ++ finally ++ (fun () -> ++ try ++ Logging.start_transaction ~con ~tid; ++ List.iter perform_exn (Transaction.get_operations t); ++ Logging.end_transaction ~con ~tid; ++ ++ Transaction.commit ~con new_t ++ with e -> ++ info "transaction_replay %d caught: %s" tid (Printexc.to_string e); ++ false ++ ) ++ (fun () -> ++ Connection.end_transaction c tid None ++ ) ++ + let do_watch con t domains cons data = + let (node, token) = + match (split None '\000' data) with +@@ -313,6 +345,7 @@ let do_transaction_end con t domains cons data = + | _ -> raise Invalid_Cmd_Args + in + let success = ++ let commit = if commit then Some (fun con trans -> transaction_replay con trans domains cons) else None in + Connection.end_transaction con (Transaction.get_id t) commit in + if not success then + raise Transaction_again; +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0009-oxenstored-log-request-and-response-during-transacti.patch xen-4.6.5/debian/patches/xsa206-4.6-0009-oxenstored-log-request-and-response-during-transacti.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0009-oxenstored-log-request-and-response-during-transacti.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0009-oxenstored-log-request-and-response-during-transacti.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,87 @@ +From 4af91642a5e39270d4ff0e029fc9dce89180b8fe Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Thu, 23 Mar 2017 16:28:34 +0000 +Subject: [PATCH 09/23] oxenstored: log request and response during transaction + replay + +During a transaction replay, the replayed requests and the new responses are +logged in the same way as the original requests and the original responses. + +Reported-by: Juergen Gross +Signed-off-by: Jonathan Davies +Reviewed-by: Andrew Cooper +Reviewed-by: Jon Ludlam +Reviewed-by: Euan Harris +Acked-by: David Scott +--- + tools/ocaml/xenstored/process.ml | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 6d1f551..fb5fdaf 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -281,6 +281,18 @@ let input_handle_error ~cons ~doms ~fct ~con ~t ~req = + | (Failure "int_of_string") -> reply_error "EINVAL" + | Define.Unknown_operation -> reply_error "ENOSYS" + ++let write_access_log ~ty ~tid ~con ~data = ++ Logging.xb_op ~ty ~tid ~con data ++ ++let write_answer_log ~ty ~tid ~con ~data = ++ Logging.xb_answer ~ty ~tid ~con data ++ ++let write_response_log ~ty ~tid ~con ~response = ++ match response with ++ | Packet.Ack _ -> write_answer_log ~ty ~tid ~con ~data:"" ++ | Packet.Reply x -> write_answer_log ~ty ~tid ~con ~data:x ++ | Packet.Error e -> write_answer_log ~ty:(Xenbus.Xb.Op.Error) ~tid ~con ~data:e ++ + (* Replay a stored transaction against a fresh store, check the responses are + all equivalent: if so, commit the transaction. Otherwise send the abort to + the client. *) +@@ -294,8 +306,10 @@ let transaction_replay c t doms cons = + let new_t = Transaction.make tid cstore in + let con = sprintf "r(%d):%s" id (Connection.get_domstr c) in + let perform_exn (request, response) = ++ write_access_log ~ty:request.Packet.ty ~tid ~con ~data:request.Packet.data; + let fct = function_of_type_simple_op request.Packet.ty in + let response' = input_handle_error ~cons ~doms ~fct ~con:c ~t:new_t ~req:request in ++ write_response_log ~ty:request.Packet.ty ~tid ~con ~response:response'; + if not(Packet.response_equal response response') then raise Transaction_again in + finally + (fun () -> +@@ -451,12 +465,6 @@ let process_packet ~store ~cons ~doms ~con ~req = + error "process packet: %s" (Printexc.to_string exn); + Connection.send_error con tid rid "EIO" + +-let write_access_log ~ty ~tid ~con ~data = +- Logging.xb_op ~ty ~tid ~con:(Connection.get_domstr con) data +- +-let write_answer_log ~ty ~tid ~con ~data = +- Logging.xb_answer ~ty ~tid ~con:(Connection.get_domstr con) data +- + let do_input store cons doms con = + let newpacket = + try +@@ -483,7 +491,7 @@ let do_input store cons doms con = + (Connection.get_domstr con) tid + (Xenbus.Xb.Op.to_string ty) (sanitize_data data); *) + process_packet ~store ~cons ~doms ~con ~req; +- write_access_log ~ty ~tid ~con ~data; ++ write_access_log ~ty ~tid ~con:(Connection.get_domstr con) ~data; + Connection.incr_ops con; + ) + +@@ -496,7 +504,7 @@ let do_output store cons doms con = + info "[%s] <- %s \"%s\"" + (Connection.get_domstr con) + (Xenbus.Xb.Op.to_string ty) (sanitize_data data);*) +- write_answer_log ~ty ~tid ~con ~data; ++ write_answer_log ~ty ~tid ~con:(Connection.get_domstr con) ~data; + ); + try + ignore (Connection.do_output con) +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0010-oxenstored-allow-compilation-prior-to-OCaml-3.12.0.patch xen-4.6.5/debian/patches/xsa206-4.6-0010-oxenstored-allow-compilation-prior-to-OCaml-3.12.0.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0010-oxenstored-allow-compilation-prior-to-OCaml-3.12.0.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0010-oxenstored-allow-compilation-prior-to-OCaml-3.12.0.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,34 @@ +From 47ea5e9be83c1e3fae2d2497a83adb66c6b4e3f8 Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Thu, 23 Mar 2017 16:28:45 +0000 +Subject: [PATCH 10/23] oxenstored: allow compilation prior to OCaml 3.12.0 + +Commit 363ae55c8 used an OCaml feature called record field punning. This broke +the build on compilers prior to OCaml 3.12.0. + +This patch makes no semantic change but now uses backwards-compatible syntax. + +Reported-by: Juergen Gross +Signed-off-by: Jonathan Davies +Reported-by: Boris Ostrovsky +Tested-by: Boris Ostrovsky +--- + tools/ocaml/xenstored/process.ml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index fb5fdaf..7b60376 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -484,7 +484,7 @@ let do_input store cons doms con = + if newpacket then ( + let packet = Connection.pop_in con in + let tid, rid, ty, data = Xenbus.Xb.Packet.unpack packet in +- let req = {Packet.tid; Packet.rid; Packet.ty; Packet.data} in ++ let req = {Packet.tid=tid; Packet.rid=rid; Packet.ty=ty; Packet.data=data} in + + (* As we don't log IO, do not call an unnecessary sanitize_data + info "[%s] -> [%d] %s \"%s\"" +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0011-oxenstored-comments-explaining-some-variables.patch xen-4.6.5/debian/patches/xsa206-4.6-0011-oxenstored-comments-explaining-some-variables.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0011-oxenstored-comments-explaining-some-variables.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0011-oxenstored-comments-explaining-some-variables.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,65 @@ +From b0711f09f6d0e3fb2b0e52e89222a16800333b21 Mon Sep 17 00:00:00 2001 +From: Thomas Sanders +Date: Tue, 14 Mar 2017 12:15:52 +0000 +Subject: [PATCH 11/23] oxenstored: comments explaining some variables + +It took a while of reading and reasoning to work out what these are +for, so here are comments to make life easier for everyone reading +this code in future. + +Reported-by: Juergen Gross +Signed-off-by: Thomas Sanders +Reviewed-by: Jonathan Davies +Reviewed-by: Ian Jackson +Reviewed-by: Christian Lindig +--- + tools/ocaml/xenstored/store.ml | 1 + + tools/ocaml/xenstored/transaction.ml | 10 +++++++--- + 2 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml +index 223ee21..9f619b8 100644 +--- a/tools/ocaml/xenstored/store.ml ++++ b/tools/ocaml/xenstored/store.ml +@@ -211,6 +211,7 @@ let apply rnode path fct = + lookup rnode path fct + end + ++(* The Store.t type *) + type t = + { + mutable stat_transaction_coalesce: int; +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index 6b37fc2..51d5d6a 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -69,11 +69,15 @@ let can_coalesce oldroot currentroot path = + else + false + +-type ty = No | Full of (int * Store.Node.t * Store.t) ++type ty = No | Full of ( ++ int * (* Transaction id *) ++ Store.Node.t * (* Original root *) ++ Store.t (* A pointer to the canonical store: its root changes on each transaction-commit *) ++) + + type t = { + ty: ty; +- store: Store.t; ++ store: Store.t; (* This is the store that we change in write operations. *) + quota: Quota.t; + mutable paths: (Xenbus.Xb.Op.operation * Store.Path.t) list; + mutable operations: (Packet.request * Packet.response) list; +@@ -155,7 +159,7 @@ let commit ~con t = + let has_commited = + match t.ty with + | No -> true +- | Full (id, oldroot, cstore) -> ++ | Full (id, oldroot, cstore) -> (* "cstore" meaning current canonical store *) + let commit_partial oldroot cstore store = + (* get the lowest path of the query and verify that it hasn't + been modified by others transactions. *) +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0012-oxenstored-handling-of-domain-conflict-credit.patch xen-4.6.5/debian/patches/xsa206-4.6-0012-oxenstored-handling-of-domain-conflict-credit.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0012-oxenstored-handling-of-domain-conflict-credit.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0012-oxenstored-handling-of-domain-conflict-credit.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,299 @@ +From 769f335e6bc11711f7fbe3a26f17f8e3a91bb007 Mon Sep 17 00:00:00 2001 +From: Thomas Sanders +Date: Tue, 14 Mar 2017 12:15:52 +0000 +Subject: [PATCH 12/23] oxenstored: handling of domain conflict-credit + +This commit gives each domain a conflict-credit variable, which will +later be used for limiting how often a domain can cause other domain's +transaction-commits to fail. + +This commit also provides functions and data for manipulating domains +and their conflict-credit, and checking whether they have credit. + +Reported-by: Juergen Gross +Signed-off-by: Thomas Sanders +Reviewed-by: Jonathan Davies +Reviewed-by: Christian Lindig + +--- + tools/ocaml/xenstored/connection.ml | 5 ++ + tools/ocaml/xenstored/define.ml | 3 + + tools/ocaml/xenstored/domain.ml | 11 +++- + tools/ocaml/xenstored/domains.ml | 103 +++++++++++++++++++++++++++++++++- + tools/ocaml/xenstored/oxenstored.conf | 32 +++++++++++ + tools/ocaml/xenstored/transaction.ml | 2 + + tools/ocaml/xenstored/xenstored.ml | 2 + + 7 files changed, 154 insertions(+), 4 deletions(-) + +diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml +index b18336f..8a8d152 100644 +--- a/tools/ocaml/xenstored/connection.ml ++++ b/tools/ocaml/xenstored/connection.ml +@@ -279,3 +279,8 @@ let debug con = + let domid = get_domstr con in + let watches = List.map (fun (path, token) -> Printf.sprintf "watch %s: %s %s\n" domid path token) (list_watches con) in + String.concat "" watches ++ ++let decr_conflict_credit doms con = ++ match con.dom with ++ | None -> () (* It's a socket connection. We don't know which domain we're in, so treat it as if it's free to conflict *) ++ | Some dom -> Domains.decr_conflict_credit doms dom +diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml +index d60861c..df1e91c 100644 +--- a/tools/ocaml/xenstored/define.ml ++++ b/tools/ocaml/xenstored/define.ml +@@ -29,6 +29,9 @@ let maxwatch = ref (50) + let maxtransaction = ref (20) + let maxrequests = ref (-1) (* maximum requests per transaction *) + ++let conflict_burst_limit = ref 5.0 ++let conflict_rate_limit_is_aggregate = ref true ++ + let domid_self = 0x7FF0 + + exception Not_a_directory of string +diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml +index ab34314..e677aa3 100644 +--- a/tools/ocaml/xenstored/domain.ml ++++ b/tools/ocaml/xenstored/domain.ml +@@ -31,8 +31,12 @@ type t = + mutable io_credit: int; (* the rounds of ring process left to do, default is 0, + usually set to 1 when there is work detected, could + also set to n to give "lazy" clients extra credit *) ++ mutable conflict_credit: float; (* Must be positive to perform writes; a commit ++ that later causes conflict with another ++ domain's transaction costs credit. *) + } + ++let is_dom0 d = d.id = 0 + let get_path dom = "/local/domain/" ^ (sprintf "%u" dom.id) + let get_id domain = domain.id + let get_interface d = d.interface +@@ -48,6 +52,10 @@ let set_io_credit ?(n=1) domain = domain.io_credit <- max 0 n + let incr_io_credit domain = domain.io_credit <- domain.io_credit + 1 + let decr_io_credit domain = domain.io_credit <- max 0 (domain.io_credit - 1) + ++let is_paused_for_conflict dom = dom.conflict_credit <= 0.0 ++ ++let is_free_to_conflict = is_dom0 ++ + let string_of_port = function + | None -> "None" + | Some x -> string_of_int (Xeneventchn.to_int x) +@@ -84,6 +92,5 @@ let make id mfn remote_port interface eventchn = { + port = None; + bad_client = false; + io_credit = 0; ++ conflict_credit = !Define.conflict_burst_limit; + } +- +-let is_dom0 d = d.id = 0 +diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml +index 92e438f..041d222 100644 +--- a/tools/ocaml/xenstored/domains.ml ++++ b/tools/ocaml/xenstored/domains.ml +@@ -15,20 +15,58 @@ + *) + + let debug fmt = Logging.debug "domains" fmt ++let error fmt = Logging.error "domains" fmt ++let warn fmt = Logging.warn "domains" fmt + + type domains = { + eventchn: Event.t; + table: (Xenctrl.domid, Domain.t) Hashtbl.t; ++ ++ (* N.B. the Queue module is not thread-safe but oxenstored is single-threaded. *) ++ (* Domains queue up to regain conflict-credit; we have a queue for ++ domains that are carrying some penalty and so are below the ++ maximum credit, and another queue for domains that have run out of ++ credit and so have had their access paused. *) ++ doms_conflict_paused: (Domain.t option ref) Queue.t; ++ doms_with_conflict_penalty: (Domain.t option ref) Queue.t; ++ ++ (* A callback function to be called when we go from zero to one paused domain. ++ This will be to reset the countdown until the next unit of credit is issued. *) ++ on_first_conflict_pause: unit -> unit; ++ ++ (* If config is set to use individual instead of aggregate conflict-rate-limiting, ++ we use this instead of the queues. *) ++ mutable n_paused: int; + } + +-let init eventchn = +- { eventchn = eventchn; table = Hashtbl.create 10 } ++let init eventchn = { ++ eventchn = eventchn; ++ table = Hashtbl.create 10; ++ doms_conflict_paused = Queue.create (); ++ doms_with_conflict_penalty = Queue.create (); ++ on_first_conflict_pause = (fun () -> ()); (* Dummy value for now, pending subsequent commit. *) ++ n_paused = 0; ++} + let del doms id = Hashtbl.remove doms.table id + let exist doms id = Hashtbl.mem doms.table id + let find doms id = Hashtbl.find doms.table id + let number doms = Hashtbl.length doms.table + let iter doms fct = Hashtbl.iter (fun _ b -> fct b) doms.table + ++(* Functions to handle queues of domains given that the domain might be deleted while in a queue. *) ++let push dom queue = ++ Queue.push (ref (Some dom)) queue ++ ++let rec pop queue = ++ match !(Queue.pop queue) with ++ | None -> pop queue ++ | Some x -> x ++ ++let remove_from_queue dom queue = ++ Queue.iter (fun d -> match !d with ++ | None -> () ++ | Some x -> if x=dom then d := None) queue ++ + let cleanup xc doms = + let notify = ref false in + let dead_dom = ref [] in +@@ -52,6 +90,11 @@ let cleanup xc doms = + let dom = Hashtbl.find doms.table id in + Domain.close dom; + Hashtbl.remove doms.table id; ++ if dom.Domain.conflict_credit <= !Define.conflict_burst_limit ++ then ( ++ remove_from_queue dom doms.doms_with_conflict_penalty; ++ if (dom.Domain.conflict_credit <= 0.) then remove_from_queue dom doms.doms_conflict_paused ++ ) + ) !dead_dom; + !notify, !dead_dom + +@@ -84,3 +127,59 @@ let create0 fake doms = + Domain.bind_interdomain dom; + Domain.notify dom; + dom ++ ++let decr_conflict_credit doms dom = ++ let before = dom.Domain.conflict_credit in ++ let after = max (-1.0) (before -. 1.0) in ++ dom.Domain.conflict_credit <- after; ++ if !Define.conflict_rate_limit_is_aggregate then ( ++ if before >= !Define.conflict_burst_limit ++ && after < !Define.conflict_burst_limit ++ && after > 0.0 ++ then ( ++ push dom doms.doms_with_conflict_penalty ++ ) else if before > 0.0 && after <= 0.0 ++ then ( ++ let first_pause = Queue.is_empty doms.doms_conflict_paused in ++ push dom doms.doms_conflict_paused; ++ if first_pause then doms.on_first_conflict_pause () ++ ) else ( ++ (* The queues are correct already: no further action needed. *) ++ ) ++ ) else if before > 0.0 && after <= 0.0 then ( ++ doms.n_paused <- doms.n_paused + 1; ++ if doms.n_paused = 1 then doms.on_first_conflict_pause () ++ ) ++ ++(* Give one point of credit to one domain, and update the queues appropriately. *) ++let incr_conflict_credit_from_queue doms = ++ let process_queue q requeue_test = ++ let d = pop q in ++ d.Domain.conflict_credit <- min (d.Domain.conflict_credit +. 1.0) !Define.conflict_burst_limit; ++ if requeue_test d.Domain.conflict_credit then ( ++ push d q (* Make it queue up again for its next point of credit. *) ++ ) ++ in ++ let paused_queue_test cred = cred <= 0.0 in ++ let penalty_queue_test cred = cred < !Define.conflict_burst_limit in ++ try process_queue doms.doms_conflict_paused paused_queue_test ++ with Queue.Empty -> ( ++ try process_queue doms.doms_with_conflict_penalty penalty_queue_test ++ with Queue.Empty -> () (* Both queues are empty: nothing to do here. *) ++ ) ++ ++let incr_conflict_credit doms = ++ if !Define.conflict_rate_limit_is_aggregate ++ then incr_conflict_credit_from_queue doms ++ else ( ++ (* Give a point of credit to every domain, subject only to the cap. *) ++ let inc dom = ++ let before = dom.Domain.conflict_credit in ++ let after = min (before +. 1.0) !Define.conflict_burst_limit in ++ dom.Domain.conflict_credit <- after; ++ if before <= 0.0 && after > 0.0 ++ then doms.n_paused <- doms.n_paused - 1 ++ in ++ (* Scope for optimisation (probably tiny): avoid iteration if all domains are at max credit *) ++ iter doms inc ++ ) +diff --git a/tools/ocaml/xenstored/oxenstored.conf b/tools/ocaml/xenstored/oxenstored.conf +index ac60f49..a100936 100644 +--- a/tools/ocaml/xenstored/oxenstored.conf ++++ b/tools/ocaml/xenstored/oxenstored.conf +@@ -9,6 +9,38 @@ test-eagain = false + # Activate transaction merge support + merge-activate = true + ++# Limits applied to domains whose writes cause other domains' transaction ++# commits to fail. Must include decimal point. ++ ++# The burst limit is the number of conflicts a domain can cause to ++# fail in a short period; this value is used for both the initial and ++# the maximum value of each domain's conflict-credit, which falls by ++# one point for each conflict caused, and when it reaches zero the ++# domain's requests are ignored. ++conflict-burst-limit = 5.0 ++ ++# The conflict-credit is replenished over time: ++# one point is issued after each conflict-max-history-seconds, so this ++# is the minimum pause-time during which a domain will be ignored. ++# conflict-max-history-seconds = 0.05 ++ ++# If the conflict-rate-limit-is-aggregate flag is true then after each ++# tick one point of conflict-credit is given to just one domain: the ++# one at the front of the queue. If false, then after each tick each ++# domain gets a point of conflict-credit. ++# ++# In environments where it is known that every transaction will ++# involve a set of nodes that is writable by at most one other domain, ++# then it is safe to set this aggregate-limit flag to false for better ++# performance. (This can be determined by considering the layout of ++# the xenstore tree and permissions, together with the content of the ++# transactions that require protection.) ++# ++# A transaction which involves a set of nodes which can be modified by ++# multiple other domains can suffer conflicts caused by any of those ++# domains, so the flag must be set to true. ++conflict-rate-limit-is-aggregate = true ++ + # Activate node permission system + perms-activate = true + +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index 51d5d6a..6f758ff 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -14,6 +14,8 @@ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) ++let error fmt = Logging.error "transaction" fmt ++ + open Stdext + + let none = 0 +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 7d3df43..941d800 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -89,6 +89,8 @@ let parse_config filename = + let pidfile = ref default_pidfile in + let options = [ + ("merge-activate", Config.Set_bool Transaction.do_coalesce); ++ ("conflict-burst-limit", Config.Set_float Define.conflict_burst_limit); ++ ("conflict-rate-limit-is-aggregate", Config.Set_bool Define.conflict_rate_limit_is_aggregate); + ("perms-activate", Config.Set_bool Perms.activate); + ("quota-activate", Config.Set_bool Quota.activate); + ("quota-maxwatch", Config.Set_int Define.maxwatch); +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0013-oxenstored-ignore-domains-with-no-conflict-credit.patch xen-4.6.5/debian/patches/xsa206-4.6-0013-oxenstored-ignore-domains-with-no-conflict-credit.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0013-oxenstored-ignore-domains-with-no-conflict-credit.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0013-oxenstored-ignore-domains-with-no-conflict-credit.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,219 @@ +From b2db82fb15c736602fdb3fa06ccd3011880925dc Mon Sep 17 00:00:00 2001 +From: Thomas Sanders +Date: Tue, 14 Mar 2017 12:15:52 +0000 +Subject: [PATCH 13/23] oxenstored: ignore domains with no conflict-credit + +When processing connections, skip those from domains with no remaining +conflict-credit. + +Also, issue a point of conflict-credit at regular intervals, the +period being set by the configuration option "conflict-max-history- +seconds". When issuing conflict-credit, we give a point either to +every domain at once (one each) or only to the single domain at the +front of the queue, depending on the configuration option +"conflict-rate-limit-is-aggregate". + +Reported-by: Juergen Gross +Signed-off-by: Thomas Sanders +Reviewed-by: Jonathan Davies +Reviewed-by: Christian Lindig +--- + tools/ocaml/xenstored/connections.ml | 14 ++++---- + tools/ocaml/xenstored/define.ml | 1 + + tools/ocaml/xenstored/domains.ml | 4 +-- + tools/ocaml/xenstored/oxenstored.conf | 2 +- + tools/ocaml/xenstored/xenstored.ml | 65 ++++++++++++++++++++++++++--------- + 5 files changed, 60 insertions(+), 26 deletions(-) + +diff --git a/tools/ocaml/xenstored/connections.ml b/tools/ocaml/xenstored/connections.ml +index f9bc225..ae76928 100644 +--- a/tools/ocaml/xenstored/connections.ml ++++ b/tools/ocaml/xenstored/connections.ml +@@ -44,12 +44,14 @@ let add_domain cons dom = + | Some p -> Hashtbl.add cons.ports p con; + | None -> () + +-let select cons = +- Hashtbl.fold +- (fun _ con (ins, outs) -> +- let fd = Connection.get_fd con in +- (fd :: ins, if Connection.has_output con then fd :: outs else outs)) +- cons.anonymous ([], []) ++let select ?(only_if = (fun _ -> true)) cons = ++ Hashtbl.fold (fun _ con (ins, outs) -> ++ if (only_if con) then ( ++ let fd = Connection.get_fd con in ++ (fd :: ins, if Connection.has_output con then fd :: outs else outs) ++ ) else (ins, outs) ++ ) ++ cons.anonymous ([], []) + + let find cons = + Hashtbl.find cons.anonymous +diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml +index df1e91c..016ef18 100644 +--- a/tools/ocaml/xenstored/define.ml ++++ b/tools/ocaml/xenstored/define.ml +@@ -30,6 +30,7 @@ let maxtransaction = ref (20) + let maxrequests = ref (-1) (* maximum requests per transaction *) + + let conflict_burst_limit = ref 5.0 ++let conflict_max_history_seconds = ref 0.05 + let conflict_rate_limit_is_aggregate = ref true + + let domid_self = 0x7FF0 +diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml +index 041d222..63c6ad5 100644 +--- a/tools/ocaml/xenstored/domains.ml ++++ b/tools/ocaml/xenstored/domains.ml +@@ -39,12 +39,12 @@ type domains = { + mutable n_paused: int; + } + +-let init eventchn = { ++let init eventchn on_first_conflict_pause = { + eventchn = eventchn; + table = Hashtbl.create 10; + doms_conflict_paused = Queue.create (); + doms_with_conflict_penalty = Queue.create (); +- on_first_conflict_pause = (fun () -> ()); (* Dummy value for now, pending subsequent commit. *) ++ on_first_conflict_pause = on_first_conflict_pause; + n_paused = 0; + } + let del doms id = Hashtbl.remove doms.table id +diff --git a/tools/ocaml/xenstored/oxenstored.conf b/tools/ocaml/xenstored/oxenstored.conf +index a100936..dd9649b 100644 +--- a/tools/ocaml/xenstored/oxenstored.conf ++++ b/tools/ocaml/xenstored/oxenstored.conf +@@ -22,7 +22,7 @@ conflict-burst-limit = 5.0 + # The conflict-credit is replenished over time: + # one point is issued after each conflict-max-history-seconds, so this + # is the minimum pause-time during which a domain will be ignored. +-# conflict-max-history-seconds = 0.05 ++conflict-max-history-seconds = 0.05 + + # If the conflict-rate-limit-is-aggregate flag is true then after each + # tick one point of conflict-credit is given to just one domain: the +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 941d800..b8e6e84 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -53,14 +53,16 @@ let process_connection_fds store cons domains rset wset = + + let process_domains store cons domains = + let do_io_domain domain = +- if not (Domain.is_bad_domain domain) then +- let io_credit = Domain.get_io_credit domain in +- if io_credit > 0 then ( +- let con = Connections.find_domain cons (Domain.get_id domain) in +- Process.do_input store cons domains con; +- Process.do_output store cons domains con; +- Domain.decr_io_credit domain; +- ) in ++ if Domain.is_bad_domain domain ++ || Domain.get_io_credit domain <= 0 ++ || Domain.is_paused_for_conflict domain ++ then () (* nothing to do *) ++ else ( ++ let con = Connections.find_domain cons (Domain.get_id domain) in ++ Process.do_input store cons domains con; ++ Process.do_output store cons domains con; ++ Domain.decr_io_credit domain ++ ) in + Domains.iter domains do_io_domain + + let sigusr1_handler store = +@@ -90,6 +92,7 @@ let parse_config filename = + let options = [ + ("merge-activate", Config.Set_bool Transaction.do_coalesce); + ("conflict-burst-limit", Config.Set_float Define.conflict_burst_limit); ++ ("conflict-max-history-seconds", Config.Set_float Define.conflict_max_history_seconds); + ("conflict-rate-limit-is-aggregate", Config.Set_bool Define.conflict_rate_limit_is_aggregate); + ("perms-activate", Config.Set_bool Perms.activate); + ("quota-activate", Config.Set_bool Quota.activate); +@@ -262,7 +265,22 @@ let _ = + + let store = Store.create () in + let eventchn = Event.init () in +- let domains = Domains.init eventchn in ++ let next_frequent_ops = ref 0. in ++ let advance_next_frequent_ops () = ++ next_frequent_ops := (Unix.gettimeofday () +. !Define.conflict_max_history_seconds) ++ in ++ let delay_next_frequent_ops_by duration = ++ next_frequent_ops := !next_frequent_ops +. duration ++ in ++ let domains = Domains.init eventchn advance_next_frequent_ops in ++ ++ (* For things that need to be done periodically but more often ++ * than the periodic_ops function *) ++ let frequent_ops () = ++ if Unix.gettimeofday () > !next_frequent_ops then ( ++ Domains.incr_conflict_credit domains; ++ advance_next_frequent_ops () ++ ) in + let cons = Connections.create () in + + let quit = ref false in +@@ -395,23 +413,34 @@ let _ = + gc.Gc.heap_words gc.Gc.heap_chunks + gc.Gc.live_words gc.Gc.live_blocks + gc.Gc.free_words gc.Gc.free_blocks +- ) +- in ++ ); ++ let elapsed = Unix.gettimeofday () -. now in ++ delay_next_frequent_ops_by elapsed ++ in + +- let period_ops_interval = 15. in +- let period_start = ref 0. in ++ let period_ops_interval = 15. in ++ let period_start = ref 0. in + + let main_loop () = +- ++ let is_peaceful c = ++ match Connection.get_domain c with ++ | None -> true (* Treat socket-connections as exempt, and free to conflict. *) ++ | Some dom -> not (Domain.is_paused_for_conflict dom) ++ in ++ frequent_ops (); + let mw = Connections.has_more_work cons in ++ let peaceful_mw = List.filter is_peaceful mw in + List.iter + (fun c -> + match Connection.get_domain c with + | None -> () | Some d -> Domain.incr_io_credit d) +- mw; ++ peaceful_mw; ++ let start_time = Unix.gettimeofday () in + let timeout = +- if List.length mw > 0 then 0. else period_ops_interval in +- let inset, outset = Connections.select cons in ++ let until_next_activity = min (max 0. (!next_frequent_ops -. start_time)) period_ops_interval in ++ if peaceful_mw <> [] then 0. else until_next_activity ++ in ++ let inset, outset = Connections.select ~only_if:is_peaceful cons in + let rset, wset, _ = + try + Select.select (spec_fds @ inset) outset [] timeout +@@ -421,6 +450,7 @@ let _ = + List.partition (fun fd -> List.mem fd spec_fds) rset in + if List.length sfds > 0 then + process_special_fds sfds; ++ + if List.length cfds > 0 || List.length wset > 0 then + process_connection_fds store cons domains cfds wset; + if timeout <> 0. then ( +@@ -428,6 +458,7 @@ let _ = + if now > !period_start +. period_ops_interval then + (period_start := now; periodic_ops now) + ); ++ + process_domains store cons domains + in + +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0014-oxenstored-add-transaction-info-relevant-to-history-.patch xen-4.6.5/debian/patches/xsa206-4.6-0014-oxenstored-add-transaction-info-relevant-to-history-.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0014-oxenstored-add-transaction-info-relevant-to-history-.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0014-oxenstored-add-transaction-info-relevant-to-history-.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,88 @@ +From b1ec169b35db0f70cba494c33a515876223ff7cc Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Tue, 14 Mar 2017 12:17:38 +0000 +Subject: [PATCH 14/23] oxenstored: add transaction info relevant to + history-tracking + +Specifically: + * retain the original store (not just the root) in full transactions + * store commit count at the time of the start of the transaction + +Reported-by: Juergen Gross +Signed-off-by: Jonathan Davies +Reviewed-by: Thomas Sanders +Reviewed-by: Ian Jackson +Reviewed-by: Christian Lindig +--- + tools/ocaml/xenstored/process.ml | 2 +- + tools/ocaml/xenstored/transaction.ml | 12 ++++++++---- + 2 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 7b60376..5f92044 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -301,7 +301,7 @@ let transaction_replay c t doms cons = + | Transaction.No -> + error "attempted to replay a non-full transaction"; + false +- | Transaction.Full(id, oldroot, cstore) -> ++ | Transaction.Full(id, oldstore, cstore) -> + let tid = Connection.start_transaction c cstore in + let new_t = Transaction.make tid cstore in + let con = sprintf "r(%d):%s" id (Connection.get_domstr c) in +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index 6f758ff..b1791b3 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -73,12 +73,13 @@ let can_coalesce oldroot currentroot path = + + type ty = No | Full of ( + int * (* Transaction id *) +- Store.Node.t * (* Original root *) ++ Store.t * (* Original store *) + Store.t (* A pointer to the canonical store: its root changes on each transaction-commit *) + ) + + type t = { + ty: ty; ++ start_count: int64; + store: Store.t; (* This is the store that we change in write operations. *) + quota: Quota.t; + mutable paths: (Xenbus.Xb.Op.operation * Store.Path.t) list; +@@ -87,10 +88,13 @@ type t = { + mutable write_lowpath: Store.Path.t option; + } + ++let counter = ref 0L ++ + let make id store = +- let ty = if id = none then No else Full(id, Store.get_root store, store) in ++ let ty = if id = none then No else Full(id, Store.copy store, store) in + { + ty = ty; ++ start_count = !counter; + store = if id = none then store else Store.copy store; + quota = Quota.copy store.Store.quota; + paths = []; +@@ -161,7 +165,7 @@ let commit ~con t = + let has_commited = + match t.ty with + | No -> true +- | Full (id, oldroot, cstore) -> (* "cstore" meaning current canonical store *) ++ | Full (id, oldstore, cstore) -> (* "cstore" meaning current canonical store *) + let commit_partial oldroot cstore store = + (* get the lowest path of the query and verify that it hasn't + been modified by others transactions. *) +@@ -204,7 +208,7 @@ let commit ~con t = + if !test_eagain && Random.int 3 = 0 then + false + else +- try_commit oldroot cstore t.store ++ try_commit (Store.get_root oldstore) cstore t.store + in + if has_commited && has_write_ops then + Disk.write t.store; +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0015-oxenstored-support-commit-history-tracking.patch xen-4.6.5/debian/patches/xsa206-4.6-0015-oxenstored-support-commit-history-tracking.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0015-oxenstored-support-commit-history-tracking.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0015-oxenstored-support-commit-history-tracking.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,153 @@ +From ae5f87f3ac593abfb08f12673a06027a34b5450f Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Tue, 14 Mar 2017 13:20:07 +0000 +Subject: [PATCH 15/23] oxenstored: support commit history tracking + +Add ability to track xenstore tree operations -- either non-transactional +operations or committed transactions. + +For now, the call to actually retain commits is commented out because history +can grow without bound. + +For now, we call record_commit for all non-transactional operations. A +subsequent patch will make it retain only the ones with side-effects. + +Reported-by: Juergen Gross +Signed-off-by: Jonathan Davies +Signed-off-by: Thomas Sanders +Reviewed-by: Christian Lindig + +--- + tools/ocaml/xenstored/Makefile | 1 + + tools/ocaml/xenstored/history.ml | 43 ++++++++++++++++++++++++++++++++++++++ + tools/ocaml/xenstored/process.ml | 24 +++++++++++++++++++-- + tools/ocaml/xenstored/xenstored.ml | 1 + + 4 files changed, 67 insertions(+), 2 deletions(-) + create mode 100644 tools/ocaml/xenstored/history.ml + +diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile +index dce9e70..ac44fc1 100644 +--- a/tools/ocaml/xenstored/Makefile ++++ b/tools/ocaml/xenstored/Makefile +@@ -50,6 +50,7 @@ OBJS = define \ + domains \ + connection \ + connections \ ++ history \ + parse_arg \ + process \ + xenstored +diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml +new file mode 100644 +index 0000000..e4b4d70 +--- /dev/null ++++ b/tools/ocaml/xenstored/history.ml +@@ -0,0 +1,43 @@ ++(* ++ * Copyright (c) 2017 Citrix Systems Ltd. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU Lesser General Public License as published ++ * by the Free Software Foundation; version 2.1 only. with the special ++ * exception on linking described in file LICENSE. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU Lesser General Public License for more details. ++ *) ++ ++type history_record = { ++ con: Connection.t; (* connection that made a change *) ++ tid: int; (* transaction id of the change (may be Transaction.none) *) ++ before: Store.t; (* the store before the change *) ++ after: Store.t; (* the store after the change *) ++ finish_count: int64; (* the commit-count at which the transaction finished *) ++} ++ ++let history : history_record list ref = ref [] ++ ++(* Called from periodic_ops to ensure we don't discard symbols that are still needed. *) ++(* There is scope for optimisation here, since in consecutive commits one commit's `after` ++ * is the same thing as the next commit's `before`, but not all commits in history are ++ * consecutive. *) ++let mark_symbols () = ++ (* There are gaps where dom0's commits are missing. Otherwise we could assume that ++ * each element's `before` is the same thing as the next element's `after` ++ * since the next element is the previous commit *) ++ List.iter (fun hist_rec -> ++ Store.mark_symbols hist_rec.before; ++ Store.mark_symbols hist_rec.after; ++ ) ++ !history ++ ++let push (x: history_record) = ++ let dom = x.con.Connection.dom in ++ match dom with ++ | None -> () (* treat socket connections as always free to conflict *) ++ | Some d -> if not (Domain.is_free_to_conflict d) then history := x :: !history +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 5f92044..964c044 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -293,6 +293,16 @@ let write_response_log ~ty ~tid ~con ~response = + | Packet.Reply x -> write_answer_log ~ty ~tid ~con ~data:x + | Packet.Error e -> write_answer_log ~ty:(Xenbus.Xb.Op.Error) ~tid ~con ~data:e + ++let record_commit ~con ~tid ~before ~after = ++ let inc r = r := Int64.add 1L !r in ++ let finish_count = inc Transaction.counter; !Transaction.counter in ++ (* This call would leak memory if historic activity is retained forever ++ so can only be uncommented if history is guaranteed not to grow ++ unboundedly. ++ History.push {History.con=con; tid=tid; before=before; after=after; finish_count=finish_count} ++ *) ++ () ++ + (* Replay a stored transaction against a fresh store, check the responses are + all equivalent: if so, commit the transaction. Otherwise send the abort to + the client. *) +@@ -363,8 +373,14 @@ let do_transaction_end con t domains cons data = + Connection.end_transaction con (Transaction.get_id t) commit in + if not success then + raise Transaction_again; +- if commit then +- process_watch (List.rev (Transaction.get_paths t)) cons ++ if commit then begin ++ process_watch (List.rev (Transaction.get_paths t)) cons; ++ match t.Transaction.ty with ++ | Transaction.No -> ++ () (* no need to record anything *) ++ | Transaction.Full(id, oldstore, cstore) -> ++ record_commit ~con ~tid:id ~before:oldstore ~after:cstore ++ end + + let do_introduce con t domains cons data = + if not (Connection.is_dom0 con) +@@ -448,7 +464,11 @@ let process_packet ~store ~cons ~doms ~con ~req = + else + Connection.get_transaction con tid + in ++ ++ let before = Store.copy store in + let response = input_handle_error ~cons ~doms ~fct ~con ~t ~req in ++ let after = Store.copy store in ++ if tid = Transaction.none then record_commit ~con ~tid ~before ~after; + + let response = try + if tid <> Transaction.none then +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index b8e6e84..1d79b9e 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -386,6 +386,7 @@ let _ = + Symbol.mark_all_as_unused (); + Store.mark_symbols store; + Connections.iter cons Connection.mark_symbols; ++ History.mark_symbols (); + Symbol.garbage () + end; + +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0016-oxenstored-only-record-operations-with-side-effects-.patch xen-4.6.5/debian/patches/xsa206-4.6-0016-oxenstored-only-record-operations-with-side-effects-.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0016-oxenstored-only-record-operations-with-side-effects-.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0016-oxenstored-only-record-operations-with-side-effects-.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,85 @@ +From 594511920df9a1121b178d73f6fb8a48dfd35f9e Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Thu, 23 Mar 2017 14:20:33 +0000 +Subject: [PATCH 16/23] oxenstored: only record operations with side-effects in + history + +There is no need to record "read" operations as they will never cause another +transaction to fail. + +Reported-by: Juergen Gross +Signed-off-by: Jonathan Davies +Reviewed-by: Thomas Sanders + +--- + tools/ocaml/xenstored/process.ml | 47 ++++++++++++++++++++++++++++++++++++---- + 1 file changed, 43 insertions(+), 4 deletions(-) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 964c044..b435a4a 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -450,6 +450,37 @@ let function_of_type ty = + | _ -> function_of_type_simple_op ty + + (** ++ * Determines which individual (non-transactional) operations we want to retain. ++ * We only want to retain operations that have side-effects in the store since ++ * these can be the cause of transactions failing. ++ *) ++let retain_op_in_history ty = ++ match ty with ++ | Xenbus.Xb.Op.Write ++ | Xenbus.Xb.Op.Mkdir ++ | Xenbus.Xb.Op.Rm ++ | Xenbus.Xb.Op.Setperms -> true ++ | Xenbus.Xb.Op.Debug ++ | Xenbus.Xb.Op.Directory ++ | Xenbus.Xb.Op.Read ++ | Xenbus.Xb.Op.Getperms ++ | Xenbus.Xb.Op.Watch ++ | Xenbus.Xb.Op.Unwatch ++ | Xenbus.Xb.Op.Transaction_start ++ | Xenbus.Xb.Op.Transaction_end ++ | Xenbus.Xb.Op.Introduce ++ | Xenbus.Xb.Op.Release ++ | Xenbus.Xb.Op.Getdomainpath ++ | Xenbus.Xb.Op.Watchevent ++ | Xenbus.Xb.Op.Error ++ | Xenbus.Xb.Op.Isintroduced ++ | Xenbus.Xb.Op.Resume ++ | Xenbus.Xb.Op.Set_target ++ | Xenbus.Xb.Op.Restrict ++ | Xenbus.Xb.Op.Reset_watches ++ | Xenbus.Xb.Op.Invalid -> false ++ ++(** + * Nothrow guarantee. + *) + let process_packet ~store ~cons ~doms ~con ~req = +@@ -465,10 +496,18 @@ let process_packet ~store ~cons ~doms ~con ~req = + Connection.get_transaction con tid + in + +- let before = Store.copy store in +- let response = input_handle_error ~cons ~doms ~fct ~con ~t ~req in +- let after = Store.copy store in +- if tid = Transaction.none then record_commit ~con ~tid ~before ~after; ++ let execute () = input_handle_error ~cons ~doms ~fct ~con ~t ~req in ++ ++ let response = ++ (* Note that transactions are recorded in history separately. *) ++ if tid = Transaction.none && retain_op_in_history ty then begin ++ let before = Store.copy store in ++ let response = execute () in ++ let after = Store.copy store in ++ record_commit ~con ~tid ~before ~after; ++ response ++ end else execute () ++ in + + let response = try + if tid <> Transaction.none then +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0017-oxenstored-discard-old-commit-history-on-txn-end.patch xen-4.6.5/debian/patches/xsa206-4.6-0017-oxenstored-discard-old-commit-history-on-txn-end.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0017-oxenstored-discard-old-commit-history-on-txn-end.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0017-oxenstored-discard-old-commit-history-on-txn-end.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,141 @@ +From 2583968f96e8d431efc79c4da48379fd93363007 Mon Sep 17 00:00:00 2001 +From: Thomas Sanders +Date: Thu, 23 Mar 2017 14:25:16 +0000 +Subject: [PATCH 17/23] oxenstored: discard old commit-history on txn end + +The history of commits is to be used for working out which historical +commit(s) (including atomic writes) caused conflicts with a +currently-failing commit of a transaction. Any commit that was made +before the current transaction started cannot be relevant. Therefore +we never need to keep history from before the start of the +longest-running transaction that is open at any given time: whenever a +transaction ends (with or without a commit) then if it was the +longest-running open transaction we can delete history up until start +of the the next-longest-running open transaction. + +Some transactions might stay open for a very long time, so if any +transaction exceeds conflict_max_history_seconds then we remove it +from consideration in this context, and will not guarantee to keep +remembering about historical commits made during such a transaction. + +We implement this by keeping a list of all open transactions that have +not been open too long. When a transaction ends, we remove it from the +list, along with any that have been open longer than the maximum; then +we delete any history from before the start of the longest-running +transaction remaining in the list. + +Reported-by: Juergen Gross +Signed-off-by: Thomas Sanders +Reviewed-by: Jonathan Davies +Reviewed-by: Christian Lindig +--- + tools/ocaml/xenstored/history.ml | 17 +++++++++++++++++ + tools/ocaml/xenstored/process.ml | 4 ++-- + tools/ocaml/xenstored/transaction.ml | 29 +++++++++++++++++++++++++---- + 3 files changed, 44 insertions(+), 6 deletions(-) + +diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml +index e4b4d70..6f7a282 100644 +--- a/tools/ocaml/xenstored/history.ml ++++ b/tools/ocaml/xenstored/history.ml +@@ -36,6 +36,23 @@ let mark_symbols () = + ) + !history + ++(* Keep only enough commit-history to protect the running transactions that we are still tracking *) ++(* There is scope for optimisation here, replacing List.filter with something more efficient, ++ * probably on a different list-like structure. *) ++let trim () = ++ history := match Transaction.oldest_short_running_transaction () with ++ | None -> [] (* We have no open transaction, so no history is needed *) ++ | Some (_, txn) -> ( ++ (* keep records with finish_count recent enough to be relevant *) ++ List.filter (fun r -> r.finish_count > txn.Transaction.start_count) !history ++ ) ++ ++let end_transaction txn con tid commit = ++ let success = Connection.end_transaction con tid commit in ++ Transaction.end_transaction txn; ++ trim (); ++ success ++ + let push (x: history_record) = + let dom = x.con.Connection.dom in + match dom with +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index b435a4a..6f4d118 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -313,7 +313,7 @@ let transaction_replay c t doms cons = + false + | Transaction.Full(id, oldstore, cstore) -> + let tid = Connection.start_transaction c cstore in +- let new_t = Transaction.make tid cstore in ++ let new_t = Transaction.make ~internal:true tid cstore in + let con = sprintf "r(%d):%s" id (Connection.get_domstr c) in + let perform_exn (request, response) = + write_access_log ~ty:request.Packet.ty ~tid ~con ~data:request.Packet.data; +@@ -370,7 +370,7 @@ let do_transaction_end con t domains cons data = + in + let success = + let commit = if commit then Some (fun con trans -> transaction_replay con trans domains cons) else None in +- Connection.end_transaction con (Transaction.get_id t) commit in ++ History.end_transaction t con (Transaction.get_id t) commit in + if not success then + raise Transaction_again; + if commit then begin +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index b1791b3..edd1178 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -87,12 +87,29 @@ type t = { + mutable read_lowpath: Store.Path.t option; + mutable write_lowpath: Store.Path.t option; + } ++let get_id t = match t.ty with No -> none | Full (id, _, _) -> id + + let counter = ref 0L + +-let make id store = ++(* Scope for optimisation: different data-structure and functions to search/filter it *) ++let short_running_txns = ref [] ++ ++let oldest_short_running_transaction () = ++ let rec last = function ++ | [] -> None ++ | [x] -> Some x ++ | x :: xs -> last xs ++ in last !short_running_txns ++ ++let end_transaction txn = ++ let cutoff = Unix.gettimeofday () -. !Define.conflict_max_history_seconds in ++ short_running_txns := List.filter ++ (function (start_time, tx) -> start_time >= cutoff && tx != txn) ++ !short_running_txns ++ ++let make ?(internal=false) id store = + let ty = if id = none then No else Full(id, Store.copy store, store) in +- { ++ let txn = { + ty = ty; + start_count = !counter; + store = if id = none then store else Store.copy store; +@@ -101,9 +118,13 @@ let make id store = + operations = []; + read_lowpath = None; + write_lowpath = None; +- } ++ } in ++ if id <> none && not internal then ( ++ let now = Unix.gettimeofday () in ++ short_running_txns := (now, txn) :: !short_running_txns ++ ); ++ txn + +-let get_id t = match t.ty with No -> none | Full (id, _, _) -> id + let get_store t = t.store + let get_paths t = t.paths + +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0018-oxenstored-track-commit-history.patch xen-4.6.5/debian/patches/xsa206-4.6-0018-oxenstored-track-commit-history.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0018-oxenstored-track-commit-history.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0018-oxenstored-track-commit-history.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,35 @@ +From 3ab13d5ebd991b4c9f9d1296c8f80a612f027298 Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Mon, 27 Mar 2017 08:58:29 +0000 +Subject: [PATCH 18/23] oxenstored: track commit history + +Since the list of historic activity cannot grow without bound, it is safe to use +this to track commits. + +Reported-by: Juergen Gross +Signed-off-by: Jonathan Davies +Reviewed-by: Thomas Sanders +--- + tools/ocaml/xenstored/process.ml | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 6f4d118..1ed1a8f 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -296,12 +296,7 @@ let write_response_log ~ty ~tid ~con ~response = + let record_commit ~con ~tid ~before ~after = + let inc r = r := Int64.add 1L !r in + let finish_count = inc Transaction.counter; !Transaction.counter in +- (* This call would leak memory if historic activity is retained forever +- so can only be uncommented if history is guaranteed not to grow +- unboundedly. + History.push {History.con=con; tid=tid; before=before; after=after; finish_count=finish_count} +- *) +- () + + (* Replay a stored transaction against a fresh store, check the responses are + all equivalent: if so, commit the transaction. Otherwise send the abort to +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0019-oxenstored-blame-the-connection-that-caused-a-transa.patch xen-4.6.5/debian/patches/xsa206-4.6-0019-oxenstored-blame-the-connection-that-caused-a-transa.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0019-oxenstored-blame-the-connection-that-caused-a-transa.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0019-oxenstored-blame-the-connection-that-caused-a-transa.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,137 @@ +From 28a3047d339c0bf524173f38bcf7d25d346e8c62 Mon Sep 17 00:00:00 2001 +From: Jonathan Davies +Date: Thu, 23 Mar 2017 14:28:16 +0000 +Subject: [PATCH 19/23] oxenstored: blame the connection that caused a + transaction conflict + +Blame each connection found to have made a commit that would cause this +transaction to fail. Each blamed connection is penalised by having its +conflict-credit decremented. + +Note the change in semantics for the replay function: we no longer stop after +finding the first operation that can't be replayed. This allows us to identify +all operations that conflicted with this transaction, not just the one that +conflicted first. + +Signed-off-by: Jonathan Davies +Signed-off-by: Thomas Sanders +v1 Reviewed-by: Christian Lindig + +Changes since v1: + * use correct log levels for informational messages +Changes since v2: + * fix the blame algorithm and improve logging + (fix was reviewed by Jonathan Davies) + +Reported-by: Juergen Gross +Signed-off-by: Thomas Sanders +--- + tools/ocaml/xenstored/history.ml | 12 ++++++++++ + tools/ocaml/xenstored/process.ml | 50 ++++++++++++++++++++++++++++++++-------- + 2 files changed, 52 insertions(+), 10 deletions(-) + +diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml +index 6f7a282..e941e2b 100644 +--- a/tools/ocaml/xenstored/history.ml ++++ b/tools/ocaml/xenstored/history.ml +@@ -58,3 +58,15 @@ let push (x: history_record) = + match dom with + | None -> () (* treat socket connections as always free to conflict *) + | Some d -> if not (Domain.is_free_to_conflict d) then history := x :: !history ++ ++(* Find the connections from records since commit-count [since] for which [f record] returns [true] *) ++let filter_connections ~since ~f = ++ (* The "mem" call is an optimisation, to avoid calling f if we have picked con already. *) ++ (* Using a hash table rather than a list is to optimise the "mem" call. *) ++ List.fold_left (fun acc hist_rec -> ++ if hist_rec.finish_count > since ++ && not (Hashtbl.mem acc hist_rec.con) ++ && f hist_rec ++ then Hashtbl.replace acc hist_rec.con (); ++ acc ++ ) (Hashtbl.create 1023) !history +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 1ed1a8f..5e5a1ab 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -16,6 +16,7 @@ + + let error fmt = Logging.error "process" fmt + let info fmt = Logging.info "process" fmt ++let debug fmt = Logging.debug "process" fmt + + open Printf + open Stdext +@@ -25,6 +26,7 @@ exception Transaction_nested + exception Domain_not_match + exception Invalid_Cmd_Args + ++(* This controls the do_debug fn in this module, not the debug logging-function. *) + let allow_debug = ref false + + let c_int_of_string s = +@@ -308,23 +310,51 @@ let transaction_replay c t doms cons = + false + | Transaction.Full(id, oldstore, cstore) -> + let tid = Connection.start_transaction c cstore in +- let new_t = Transaction.make ~internal:true tid cstore in ++ let replay_t = Transaction.make ~internal:true tid cstore in + let con = sprintf "r(%d):%s" id (Connection.get_domstr c) in +- let perform_exn (request, response) = +- write_access_log ~ty:request.Packet.ty ~tid ~con ~data:request.Packet.data; ++ ++ let perform_exn ~wlog txn (request, response) = ++ if wlog then write_access_log ~ty:request.Packet.ty ~tid ~con ~data:request.Packet.data; + let fct = function_of_type_simple_op request.Packet.ty in +- let response' = input_handle_error ~cons ~doms ~fct ~con:c ~t:new_t ~req:request in +- write_response_log ~ty:request.Packet.ty ~tid ~con ~response:response'; +- if not(Packet.response_equal response response') then raise Transaction_again in ++ let response' = input_handle_error ~cons ~doms ~fct ~con:c ~t:txn ~req:request in ++ if wlog then write_response_log ~ty:request.Packet.ty ~tid ~con ~response:response'; ++ if not(Packet.response_equal response response') then raise Transaction_again ++ in + finally + (fun () -> + try + Logging.start_transaction ~con ~tid; +- List.iter perform_exn (Transaction.get_operations t); +- Logging.end_transaction ~con ~tid; ++ List.iter (perform_exn ~wlog:true replay_t) (Transaction.get_operations t); (* May throw EAGAIN *) + +- Transaction.commit ~con new_t +- with e -> ++ Logging.end_transaction ~con ~tid; ++ Transaction.commit ~con replay_t ++ with ++ | Transaction_again -> ( ++ let victim_domstr = Connection.get_domstr c in ++ debug "Apportioning blame for EAGAIN in txn %d, domain=%s" id victim_domstr; ++ let punish guilty_con = ++ debug "Blaming domain %s for conflict with domain %s txn %d" ++ (Connection.get_domstr guilty_con) victim_domstr id; ++ Connection.decr_conflict_credit doms guilty_con ++ in ++ let judge_and_sentence hist_rec = ( ++ let can_apply_on store = ( ++ let store = Store.copy store in ++ let trial_t = Transaction.make ~internal:true Transaction.none store in ++ try List.iter (perform_exn ~wlog:false trial_t) (Transaction.get_operations t); ++ true ++ with Transaction_again -> false ++ ) in ++ if can_apply_on hist_rec.History.before ++ && not (can_apply_on hist_rec.History.after) ++ then (punish hist_rec.History.con; true) ++ else false ++ ) in ++ let guilty_cons = History.filter_connections ~since:t.Transaction.start_count ~f:judge_and_sentence in ++ if Hashtbl.length guilty_cons = 0 then debug "Found no culprit for conflict in %s: must be self or not in history." con; ++ false ++ ) ++ | e -> + info "transaction_replay %d caught: %s" tid (Printexc.to_string e); + false + ) +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0020-oxenstored-allow-self-conflicts.patch xen-4.6.5/debian/patches/xsa206-4.6-0020-oxenstored-allow-self-conflicts.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0020-oxenstored-allow-self-conflicts.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0020-oxenstored-allow-self-conflicts.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,58 @@ +From 54b70b7e7b8c09a2cf9ac1f01d357cf1a5a9f34b Mon Sep 17 00:00:00 2001 +From: Thomas Sanders +Date: Thu, 23 Mar 2017 19:06:54 +0000 +Subject: [PATCH 20/23] oxenstored: allow self-conflicts + +We already avoid inter-domain conflicts but now allow intra-domain +conflicts. Although there are no known practical examples of a domain +that might perform operations that conflict with its own transactions, +this is conceivable, so here we avoid changing those semantics +unnecessarily. + +When a transaction commit fails with a conflict and we look through +the history of commits to see which connection(s) to blame, ignore +historical commits that were made by the same connection as the +failing commit. + +Reported-by: Juergen Gross +Signed-off-by: Thomas Sanders +Reviewed-by: Jonathan Davies +--- + tools/ocaml/xenstored/history.ml | 3 ++- + tools/ocaml/xenstored/process.ml | 2 +- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml +index e941e2b..4079588 100644 +--- a/tools/ocaml/xenstored/history.ml ++++ b/tools/ocaml/xenstored/history.ml +@@ -60,11 +60,12 @@ let push (x: history_record) = + | Some d -> if not (Domain.is_free_to_conflict d) then history := x :: !history + + (* Find the connections from records since commit-count [since] for which [f record] returns [true] *) +-let filter_connections ~since ~f = ++let filter_connections ~ignore ~since ~f = + (* The "mem" call is an optimisation, to avoid calling f if we have picked con already. *) + (* Using a hash table rather than a list is to optimise the "mem" call. *) + List.fold_left (fun acc hist_rec -> + if hist_rec.finish_count > since ++ && not (hist_rec.con == ignore) + && not (Hashtbl.mem acc hist_rec.con) + && f hist_rec + then Hashtbl.replace acc hist_rec.con (); +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 5e5a1ab..b56e3fc 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -350,7 +350,7 @@ let transaction_replay c t doms cons = + then (punish hist_rec.History.con; true) + else false + ) in +- let guilty_cons = History.filter_connections ~since:t.Transaction.start_count ~f:judge_and_sentence in ++ let guilty_cons = History.filter_connections ~ignore:c ~since:t.Transaction.start_count ~f:judge_and_sentence in + if Hashtbl.length guilty_cons = 0 then debug "Found no culprit for conflict in %s: must be self or not in history." con; + false + ) +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0021-oxenstored-do-not-commit-read-only-transactions.patch xen-4.6.5/debian/patches/xsa206-4.6-0021-oxenstored-do-not-commit-read-only-transactions.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0021-oxenstored-do-not-commit-read-only-transactions.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0021-oxenstored-do-not-commit-read-only-transactions.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,59 @@ +From 04c815f29e918ca54093da61d28eec18db582074 Mon Sep 17 00:00:00 2001 +From: Thomas Sanders +Date: Fri, 24 Mar 2017 16:16:10 +0000 +Subject: [PATCH 21/23] oxenstored: do not commit read-only transactions + +The packet telling us to end the transaction has always carried an +argument telling us whether to commit. + +If the transaction made no modifications to the tree, now we ignore +that argument and do not commit: it is just a waste of effort. + +This makes read-only transactions immune to conflicts, and means that +we do not need to store any of their details in the history that is +used for assigning blame for conflicts. + +We count a transaction as a read-only transaction only if it contains +no operations that modified the tree. + +This means that (for example) a transaction that creates a new node +then deletes it would NOT count as read-only, even though it makes no +change overall. A more sophisticated algorithm could judge the +transaction based on comparison of its initial and final states, but +this would add complexity and computational cost. + +Reported-by: Juergen Gross +Signed-off-by: Thomas Sanders +Reviewed-by: Jonathan Davies +--- + tools/ocaml/xenstored/process.ml | 1 + + tools/ocaml/xenstored/transaction.ml | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index b56e3fc..adfc7a4 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -393,6 +393,7 @@ let do_transaction_end con t domains cons data = + | x :: _ -> raise (Invalid_argument x) + | _ -> raise Invalid_Cmd_Args + in ++ let commit = commit && not (Transaction.is_read_only t) in + let success = + let commit = if commit then Some (fun con trans -> transaction_replay con trans domains cons) else None in + History.end_transaction t con (Transaction.get_id t) commit in +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index edd1178..8f95301 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -128,6 +128,7 @@ let make ?(internal=false) id store = + let get_store t = t.store + let get_paths t = t.paths + ++let is_read_only t = t.paths = [] + let add_wop t ty path = t.paths <- (ty, path) :: t.paths + let add_operation ~perm t request response = + if !Define.maxrequests >= 0 +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0022-oxenstored-don-t-wake-to-issue-no-conflict-credit.patch xen-4.6.5/debian/patches/xsa206-4.6-0022-oxenstored-don-t-wake-to-issue-no-conflict-credit.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0022-oxenstored-don-t-wake-to-issue-no-conflict-credit.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0022-oxenstored-don-t-wake-to-issue-no-conflict-credit.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,141 @@ +From fbc4354a22c070e6d336b9bf4eae5dfb80657a9b Mon Sep 17 00:00:00 2001 +From: Thomas Sanders +Date: Fri, 24 Mar 2017 19:55:03 +0000 +Subject: [PATCH 22/23] oxenstored: don't wake to issue no conflict-credit + +In the main loop, when choosing the timeout for the select function +call, we were setting it so as to wake up to issue conflict-credit to +any domains that could accept it. When xenstore is idle, this would +mean waking up every 50ms (by default) to do no work. With this +commit, we check whether any domain is below its cap, and if not then +we set the timeout for longer (the same timeout as before the +conflict-protection feature was added). + +Reported-by: Juergen Gross +Signed-off-by: Thomas Sanders +Reviewed-by: Jonathan Davies +--- + tools/ocaml/xenstored/domains.ml | 51 ++++++++++++++++++++++++++++++-------- + tools/ocaml/xenstored/xenstored.ml | 5 +++- + 2 files changed, 44 insertions(+), 12 deletions(-) + +diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml +index 63c6ad5..25fd592 100644 +--- a/tools/ocaml/xenstored/domains.ml ++++ b/tools/ocaml/xenstored/domains.ml +@@ -35,8 +35,9 @@ type domains = { + on_first_conflict_pause: unit -> unit; + + (* If config is set to use individual instead of aggregate conflict-rate-limiting, +- we use this instead of the queues. *) +- mutable n_paused: int; ++ we use these counts instead of the queues. The second one includes the first. *) ++ mutable n_paused: int; (* Number of domains with zero or negative credit *) ++ mutable n_penalised: int; (* Number of domains with less than maximum credit *) + } + + let init eventchn on_first_conflict_pause = { +@@ -46,6 +47,7 @@ let init eventchn on_first_conflict_pause = { + doms_with_conflict_penalty = Queue.create (); + on_first_conflict_pause = on_first_conflict_pause; + n_paused = 0; ++ n_penalised = 0; + } + let del doms id = Hashtbl.remove doms.table id + let exist doms id = Hashtbl.mem doms.table id +@@ -53,6 +55,23 @@ let find doms id = Hashtbl.find doms.table id + let number doms = Hashtbl.length doms.table + let iter doms fct = Hashtbl.iter (fun _ b -> fct b) doms.table + ++let rec is_empty_queue q = ++ Queue.is_empty q || ++ if !(Queue.peek q) = None ++ then ( ++ ignore (Queue.pop q); ++ is_empty_queue q ++ ) else false ++ ++let all_at_max_credit doms = ++ if !Define.conflict_rate_limit_is_aggregate ++ then ++ (* Check both becuase if burst limit is 1.0 then a domain can go straight ++ * from max-credit to paused without getting into the penalty queue. *) ++ is_empty_queue doms.doms_with_conflict_penalty ++ && is_empty_queue doms.doms_conflict_paused ++ else doms.n_penalised = 0 ++ + (* Functions to handle queues of domains given that the domain might be deleted while in a queue. *) + let push dom queue = + Queue.push (ref (Some dom)) queue +@@ -132,13 +151,16 @@ let decr_conflict_credit doms dom = + let before = dom.Domain.conflict_credit in + let after = max (-1.0) (before -. 1.0) in + dom.Domain.conflict_credit <- after; ++ let newly_penalised = ++ before >= !Define.conflict_burst_limit ++ && after < !Define.conflict_burst_limit in ++ let newly_paused = before > 0.0 && after <= 0.0 in + if !Define.conflict_rate_limit_is_aggregate then ( +- if before >= !Define.conflict_burst_limit +- && after < !Define.conflict_burst_limit ++ if newly_penalised + && after > 0.0 + then ( + push dom doms.doms_with_conflict_penalty +- ) else if before > 0.0 && after <= 0.0 ++ ) else if newly_paused + then ( + let first_pause = Queue.is_empty doms.doms_conflict_paused in + push dom doms.doms_conflict_paused; +@@ -146,9 +168,12 @@ let decr_conflict_credit doms dom = + ) else ( + (* The queues are correct already: no further action needed. *) + ) +- ) else if before > 0.0 && after <= 0.0 then ( +- doms.n_paused <- doms.n_paused + 1; +- if doms.n_paused = 1 then doms.on_first_conflict_pause () ++ ) else ( ++ if newly_penalised then doms.n_penalised <- doms.n_penalised + 1; ++ if newly_paused then ( ++ doms.n_paused <- doms.n_paused + 1; ++ if doms.n_paused = 1 then doms.on_first_conflict_pause () ++ ) + ) + + (* Give one point of credit to one domain, and update the queues appropriately. *) +@@ -177,9 +202,13 @@ let incr_conflict_credit doms = + let before = dom.Domain.conflict_credit in + let after = min (before +. 1.0) !Define.conflict_burst_limit in + dom.Domain.conflict_credit <- after; ++ + if before <= 0.0 && after > 0.0 +- then doms.n_paused <- doms.n_paused - 1 ++ then doms.n_paused <- doms.n_paused - 1; ++ ++ if before < !Define.conflict_burst_limit ++ && after >= !Define.conflict_burst_limit ++ then doms.n_penalised <- doms.n_penalised - 1 + in +- (* Scope for optimisation (probably tiny): avoid iteration if all domains are at max credit *) +- iter doms inc ++ if doms.n_penalised > 0 then iter doms inc + ) +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 1d79b9e..03e19bb 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -438,7 +438,10 @@ let _ = + peaceful_mw; + let start_time = Unix.gettimeofday () in + let timeout = +- let until_next_activity = min (max 0. (!next_frequent_ops -. start_time)) period_ops_interval in ++ let until_next_activity = ++ if Domains.all_at_max_credit domains ++ then period_ops_interval ++ else min (max 0. (!next_frequent_ops -. start_time)) period_ops_interval in + if peaceful_mw <> [] then 0. else until_next_activity + in + let inset, outset = Connections.select ~only_if:is_peaceful cons in +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0023-oxenstored-transaction-conflicts-improve-logging.patch xen-4.6.5/debian/patches/xsa206-4.6-0023-oxenstored-transaction-conflicts-improve-logging.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0023-oxenstored-transaction-conflicts-improve-logging.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0023-oxenstored-transaction-conflicts-improve-logging.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,153 @@ +From a64892b9765cd4a79f19320f61ad4b8afb5826b1 Mon Sep 17 00:00:00 2001 +From: Thomas Sanders +Date: Mon, 27 Mar 2017 14:36:34 +0100 +Subject: [PATCH 23/23] oxenstored transaction conflicts: improve logging + +For information related to transaction conflicts, potentially frequent +logging at "info" priority has been changed to "debug" priority, and +once per two minutes there is an "info" priority summary. + +Additional detailed logging has been added at "debug" priority. + +Reported-by: Juergen Gross +Signed-off-by: Thomas Sanders +--- + tools/ocaml/xenstored/domain.ml | 8 ++++++++ + tools/ocaml/xenstored/domains.ml | 5 +++++ + tools/ocaml/xenstored/process.ml | 6 +++++- + tools/ocaml/xenstored/transaction.ml | 5 +++++ + tools/ocaml/xenstored/xenstored.ml | 6 ++++++ + 5 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml +index e677aa3..4515650 100644 +--- a/tools/ocaml/xenstored/domain.ml ++++ b/tools/ocaml/xenstored/domain.ml +@@ -34,6 +34,7 @@ type t = + mutable conflict_credit: float; (* Must be positive to perform writes; a commit + that later causes conflict with another + domain's transaction costs credit. *) ++ mutable caused_conflicts: int64; + } + + let is_dom0 d = d.id = 0 +@@ -93,4 +94,11 @@ let make id mfn remote_port interface eventchn = { + bad_client = false; + io_credit = 0; + conflict_credit = !Define.conflict_burst_limit; ++ caused_conflicts = 0L; + } ++ ++let log_and_reset_conflict_stats logfn dom = ++ if dom.caused_conflicts > 0L then ( ++ logfn dom.id dom.caused_conflicts; ++ dom.caused_conflicts <- 0L ++ ) +diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml +index 25fd592..ca749fa 100644 +--- a/tools/ocaml/xenstored/domains.ml ++++ b/tools/ocaml/xenstored/domains.ml +@@ -148,8 +148,10 @@ let create0 fake doms = + dom + + let decr_conflict_credit doms dom = ++ dom.Domain.caused_conflicts <- Int64.add 1L dom.Domain.caused_conflicts; + let before = dom.Domain.conflict_credit in + let after = max (-1.0) (before -. 1.0) in ++ debug "decr_conflict_credit dom%d %F -> %F" (Domain.get_id dom) before after; + dom.Domain.conflict_credit <- after; + let newly_penalised = + before >= !Define.conflict_burst_limit +@@ -180,7 +182,9 @@ let decr_conflict_credit doms dom = + let incr_conflict_credit_from_queue doms = + let process_queue q requeue_test = + let d = pop q in ++ let before = d.Domain.conflict_credit in (* just for debug-logging *) + d.Domain.conflict_credit <- min (d.Domain.conflict_credit +. 1.0) !Define.conflict_burst_limit; ++ debug "incr_conflict_credit_from_queue: dom%d: %F -> %F" (Domain.get_id d) before d.Domain.conflict_credit; + if requeue_test d.Domain.conflict_credit then ( + push d q (* Make it queue up again for its next point of credit. *) + ) +@@ -202,6 +206,7 @@ let incr_conflict_credit doms = + let before = dom.Domain.conflict_credit in + let after = min (before +. 1.0) !Define.conflict_burst_limit in + dom.Domain.conflict_credit <- after; ++ debug "incr_conflict_credit dom%d: %F -> %F" (Domain.get_id dom) before after; + + if before <= 0.0 && after > 0.0 + then doms.n_paused <- doms.n_paused - 1; +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index adfc7a4..8a688c4 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -330,6 +330,7 @@ let transaction_replay c t doms cons = + Transaction.commit ~con replay_t + with + | Transaction_again -> ( ++ Transaction.failed_commits := Int64.add !Transaction.failed_commits 1L; + let victim_domstr = Connection.get_domstr c in + debug "Apportioning blame for EAGAIN in txn %d, domain=%s" id victim_domstr; + let punish guilty_con = +@@ -351,7 +352,10 @@ let transaction_replay c t doms cons = + else false + ) in + let guilty_cons = History.filter_connections ~ignore:c ~since:t.Transaction.start_count ~f:judge_and_sentence in +- if Hashtbl.length guilty_cons = 0 then debug "Found no culprit for conflict in %s: must be self or not in history." con; ++ if Hashtbl.length guilty_cons = 0 then ( ++ debug "Found no culprit for conflict in %s: must be self or not in history." con; ++ Transaction.failed_commits_no_culprit := Int64.add !Transaction.failed_commits_no_culprit 1L ++ ); + false + ) + | e -> +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index 8f95301..da4a3e3 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -90,6 +90,11 @@ type t = { + let get_id t = match t.ty with No -> none | Full (id, _, _) -> id + + let counter = ref 0L ++let failed_commits = ref 0L ++let failed_commits_no_culprit = ref 0L ++let reset_conflict_stats () = ++ failed_commits := 0L; ++ failed_commits_no_culprit := 0L + + (* Scope for optimisation: different data-structure and functions to search/filter it *) + let short_running_txns = ref [] +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 03e19bb..a481d80 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -377,6 +377,7 @@ let _ = + let last_scan_time = ref 0. in + + let periodic_ops now = ++ debug "periodic_ops starting"; + (* we garbage collect the string->int dictionary after a sizeable amount of operations, + * there's no need to be really fast even if we got loose + * objects since names are often reuse. +@@ -396,7 +397,11 @@ let _ = + + (* make sure we don't print general stats faster than 2 min *) + if now > (!last_stat_time +. 120.) then ( ++ info "Transaction conflict statistics for last %F seconds:" (now -. !last_stat_time); + last_stat_time := now; ++ Domains.iter domains (Domain.log_and_reset_conflict_stats (info "Dom%d caused %Ld conflicts")); ++ info "%Ld failed transactions; of these no culprit was found for %Ld" !Transaction.failed_commits !Transaction.failed_commits_no_culprit; ++ Transaction.reset_conflict_stats (); + + let gc = Gc.stat () in + let (lanon, lanon_ops, lanon_watchs, +@@ -416,6 +421,7 @@ let _ = + gc.Gc.free_words gc.Gc.free_blocks + ); + let elapsed = Unix.gettimeofday () -. now in ++ debug "periodic_ops took %F seconds." elapsed; + delay_next_frequent_ops_by elapsed + in + +-- +2.1.4 + diff -Nru xen-4.6.0/debian/patches/xsa206-4.6-0024-oxenstored-trim-history-in-the-frequent_ops-function.patch xen-4.6.5/debian/patches/xsa206-4.6-0024-oxenstored-trim-history-in-the-frequent_ops-function.patch --- xen-4.6.0/debian/patches/xsa206-4.6-0024-oxenstored-trim-history-in-the-frequent_ops-function.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa206-4.6-0024-oxenstored-trim-history-in-the-frequent_ops-function.patch 2017-05-09 12:51:19.000000000 +0000 @@ -0,0 +1,79 @@ +From 26b15d4eb7ac71fcab28a7fca664afa0549c135c Mon Sep 17 00:00:00 2001 +From: Thomas Sanders +Date: Tue, 28 Mar 2017 18:57:52 +0100 +Subject: [PATCH 24/23] oxenstored: trim history in the frequent_ops function + +We were trimming the history of commits only at the end of each +transaction (regardless of how it ended). + +Therefore if non-transactional writes were being made but no +transactions were being ended, the history would grow +indefinitely. Now we trim the history at regular intervals. + +Signed-off-by: Thomas Sanders +--- + tools/ocaml/xenstored/history.ml | 6 +++--- + tools/ocaml/xenstored/transaction.ml | 8 ++++++-- + tools/ocaml/xenstored/xenstored.ml | 1 + + 3 files changed, 10 insertions(+), 5 deletions(-) + +diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml +index 4079588..f39565b 100644 +--- a/tools/ocaml/xenstored/history.ml ++++ b/tools/ocaml/xenstored/history.ml +@@ -39,7 +39,8 @@ let mark_symbols () = + (* Keep only enough commit-history to protect the running transactions that we are still tracking *) + (* There is scope for optimisation here, replacing List.filter with something more efficient, + * probably on a different list-like structure. *) +-let trim () = ++let trim ?txn () = ++ Transaction.trim_short_running_transactions txn; + history := match Transaction.oldest_short_running_transaction () with + | None -> [] (* We have no open transaction, so no history is needed *) + | Some (_, txn) -> ( +@@ -49,8 +50,7 @@ let trim () = + + let end_transaction txn con tid commit = + let success = Connection.end_transaction con tid commit in +- Transaction.end_transaction txn; +- trim (); ++ trim ~txn (); + success + + let push (x: history_record) = +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index da4a3e3..23e7ccf 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -106,10 +106,14 @@ let oldest_short_running_transaction () = + | x :: xs -> last xs + in last !short_running_txns + +-let end_transaction txn = ++let trim_short_running_transactions txn = + let cutoff = Unix.gettimeofday () -. !Define.conflict_max_history_seconds in ++ let keep = match txn with ++ | None -> (function (start_time, _) -> start_time >= cutoff) ++ | Some t -> (function (start_time, tx) -> start_time >= cutoff && tx != t) ++ in + short_running_txns := List.filter +- (function (start_time, tx) -> start_time >= cutoff && tx != txn) ++ keep + !short_running_txns + + let make ?(internal=false) id store = +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 92ea99e..c45146d 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -280,6 +280,7 @@ let _ = + * than the periodic_ops function *) + let frequent_ops () = + if Unix.gettimeofday () > !next_frequent_ops then ( ++ History.trim (); + Domains.incr_conflict_credit domains; + advance_next_frequent_ops () + ) in +-- +1.7.9.5 + diff -Nru xen-4.6.0/debian/patches/xsa212.patch xen-4.6.5/debian/patches/xsa212.patch --- xen-4.6.0/debian/patches/xsa212.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa212.patch 2017-05-09 13:27:28.000000000 +0000 @@ -0,0 +1,87 @@ +memory: properly check guest memory ranges in XENMEM_exchange handling + +The use of guest_handle_okay() here (as introduced by the XSA-29 fix) +is insufficient here, guest_handle_subrange_okay() needs to be used +instead. + +Note that the uses are okay in +- XENMEM_add_to_physmap_batch handling due to the size field being only + 16 bits wide, +- livepatch_list() due to the limit of 1024 enforced on the + number-of-entries input (leaving aside the fact that this can be + called by a privileged domain only anyway), +- compat mode handling due to counts there being limited to 32 bits, +- everywhere else due to guest arrays being accessed sequentially from + index zero. + +This is XSA-212. + +Reported-by: Jann Horn +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/common/memory.c ++++ b/xen/common/memory.c +@@ -436,8 +436,8 @@ static long memory_exchange(XEN_GUEST_HA + goto fail_early; + } + +- if ( !guest_handle_okay(exch.in.extent_start, exch.in.nr_extents) || +- !guest_handle_okay(exch.out.extent_start, exch.out.nr_extents) ) ++ if ( !guest_handle_subrange_okay(exch.in.extent_start, exch.nr_exchanged, ++ exch.in.nr_extents - 1) ) + { + rc = -EFAULT; + goto fail_early; +@@ -447,11 +447,27 @@ static long memory_exchange(XEN_GUEST_HA + { + in_chunk_order = exch.out.extent_order - exch.in.extent_order; + out_chunk_order = 0; ++ ++ if ( !guest_handle_subrange_okay(exch.out.extent_start, ++ exch.nr_exchanged >> in_chunk_order, ++ exch.out.nr_extents - 1) ) ++ { ++ rc = -EFAULT; ++ goto fail_early; ++ } + } + else + { + in_chunk_order = 0; + out_chunk_order = exch.in.extent_order - exch.out.extent_order; ++ ++ if ( !guest_handle_subrange_okay(exch.out.extent_start, ++ exch.nr_exchanged << out_chunk_order, ++ exch.out.nr_extents - 1) ) ++ { ++ rc = -EFAULT; ++ goto fail_early; ++ } + } + + d = rcu_lock_domain_by_any_id(exch.in.domid); +--- a/xen/include/asm-x86/x86_64/uaccess.h ++++ b/xen/include/asm-x86/x86_64/uaccess.h +@@ -29,8 +29,9 @@ extern void *xlat_malloc(unsigned long * + /* + * Valid if in +ve half of 48-bit address space, or above Xen-reserved area. + * This is also valid for range checks (addr, addr+size). As long as the +- * start address is outside the Xen-reserved area then we will access a +- * non-canonical address (and thus fault) before ever reaching VIRT_START. ++ * start address is outside the Xen-reserved area, sequential accesses ++ * (starting at addr) will hit a non-canonical address (and thus fault) ++ * before ever reaching VIRT_START. + */ + #define __addr_ok(addr) \ + (((unsigned long)(addr) < (1UL<<47)) || \ +@@ -40,7 +41,8 @@ extern void *xlat_malloc(unsigned long * + (__addr_ok(addr) || is_compat_arg_xlat_range(addr, size)) + + #define array_access_ok(addr, count, size) \ +- (access_ok(addr, (count)*(size))) ++ (likely(((count) ?: 0UL) < (~0UL / (size))) && \ ++ access_ok(addr, (count) * (size))) + + #define __compat_addr_ok(d, addr) \ + ((unsigned long)(addr) < HYPERVISOR_COMPAT_VIRT_START(d)) diff -Nru xen-4.6.0/debian/patches/xsa213-4.6.patch xen-4.6.5/debian/patches/xsa213-4.6.patch --- xen-4.6.0/debian/patches/xsa213-4.6.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa213-4.6.patch 2017-05-09 13:28:28.000000000 +0000 @@ -0,0 +1,173 @@ +From: Jan Beulich +Subject: multicall: deal with early exit conditions + +In particular changes to guest privilege level require the multicall +sequence to be aborted, as hypercalls are permitted from kernel mode +only. While likely not very useful in a multicall, also properly handle +the return value in the HYPERVISOR_iret case (which should be the guest +specified value). + +This is XSA-213. + +Reported-by: Jann Horn +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Julien Grall + +--- a/xen/arch/arm/traps.c ++++ b/xen/arch/arm/traps.c +@@ -1485,30 +1485,33 @@ static bool_t check_multicall_32bit_clea + return true; + } + +-void do_multicall_call(struct multicall_entry *multi) ++enum mc_disposition do_multicall_call(struct multicall_entry *multi) + { + arm_hypercall_fn_t call = NULL; + + if ( multi->op >= ARRAY_SIZE(arm_hypercall_table) ) + { + multi->result = -ENOSYS; +- return; ++ return mc_continue; + } + + call = arm_hypercall_table[multi->op].fn; + if ( call == NULL ) + { + multi->result = -ENOSYS; +- return; ++ return mc_continue; + } + + if ( is_32bit_domain(current->domain) && + !check_multicall_32bit_clean(multi) ) +- return; ++ return mc_continue; + + multi->result = call(multi->args[0], multi->args[1], + multi->args[2], multi->args[3], + multi->args[4]); ++ ++ return likely(!psr_mode_is_user(guest_cpu_user_regs())) ++ ? mc_continue : mc_preempt; + } + + /* +--- a/xen/common/multicall.c ++++ b/xen/common/multicall.c +@@ -40,6 +40,7 @@ do_multicall( + struct mc_state *mcs = ¤t->mc_state; + uint32_t i; + int rc = 0; ++ enum mc_disposition disp = mc_continue; + + if ( unlikely(__test_and_set_bit(_MCSF_in_multicall, &mcs->flags)) ) + { +@@ -50,7 +51,7 @@ do_multicall( + if ( unlikely(!guest_handle_okay(call_list, nr_calls)) ) + rc = -EFAULT; + +- for ( i = 0; !rc && i < nr_calls; i++ ) ++ for ( i = 0; !rc && disp == mc_continue && i < nr_calls; i++ ) + { + if ( i && hypercall_preempt_check() ) + goto preempted; +@@ -63,7 +64,7 @@ do_multicall( + + trace_multicall_call(&mcs->call); + +- do_multicall_call(&mcs->call); ++ disp = do_multicall_call(&mcs->call); + + #ifndef NDEBUG + { +@@ -77,7 +78,14 @@ do_multicall( + } + #endif + +- if ( unlikely(__copy_field_to_guest(call_list, &mcs->call, result)) ) ++ if ( unlikely(disp == mc_exit) ) ++ { ++ if ( __copy_field_to_guest(call_list, &mcs->call, result) ) ++ /* nothing, best effort only */; ++ rc = mcs->call.result; ++ } ++ else if ( unlikely(__copy_field_to_guest(call_list, &mcs->call, ++ result)) ) + rc = -EFAULT; + else if ( test_bit(_MCSF_call_preempted, &mcs->flags) ) + { +@@ -93,6 +101,9 @@ do_multicall( + guest_handle_add_offset(call_list, 1); + } + ++ if ( unlikely(disp == mc_preempt) && i < nr_calls ) ++ goto preempted; ++ + perfc_incr(calls_to_multicall); + perfc_add(calls_from_multicall, i); + mcs->flags = 0; +--- a/xen/include/asm-arm/multicall.h ++++ b/xen/include/asm-arm/multicall.h +@@ -1,7 +1,11 @@ + #ifndef __ASM_ARM_MULTICALL_H__ + #define __ASM_ARM_MULTICALL_H__ + +-extern void do_multicall_call(struct multicall_entry *call); ++extern enum mc_disposition { ++ mc_continue, ++ mc_exit, ++ mc_preempt, ++} do_multicall_call(struct multicall_entry *call); + + #endif /* __ASM_ARM_MULTICALL_H__ */ + /* +--- a/xen/include/asm-x86/multicall.h ++++ b/xen/include/asm-x86/multicall.h +@@ -7,8 +7,21 @@ + + #include + ++enum mc_disposition { ++ mc_continue, ++ mc_exit, ++ mc_preempt, ++}; ++ ++#define multicall_ret(call) \ ++ (unlikely((call)->op == __HYPERVISOR_iret) \ ++ ? mc_exit \ ++ : likely(guest_kernel_mode(current, \ ++ guest_cpu_user_regs())) \ ++ ? mc_continue : mc_preempt) ++ + #define do_multicall_call(_call) \ +- do { \ ++ ({ \ + __asm__ __volatile__ ( \ + " movq %c1(%0),%%rax; " \ + " leaq hypercall_table(%%rip),%%rdi; " \ +@@ -37,9 +50,11 @@ + /* all the caller-saves registers */ \ + : "rax", "rcx", "rdx", "rsi", "rdi", \ + "r8", "r9", "r10", "r11" ); \ +- } while ( 0 ) ++ multicall_ret(_call); \ ++ }) + + #define compat_multicall_call(_call) \ ++ ({ \ + __asm__ __volatile__ ( \ + " movl %c1(%0),%%eax; " \ + " leaq compat_hypercall_table(%%rip),%%rdi; "\ +@@ -67,6 +82,8 @@ + "i" (-ENOSYS) \ + /* all the caller-saves registers */ \ + : "rax", "rcx", "rdx", "rsi", "rdi", \ +- "r8", "r9", "r10", "r11" ) \ ++ "r8", "r9", "r10", "r11" ); \ ++ multicall_ret(_call); \ ++ }) + + #endif /* __ASM_X86_MULTICALL_H__ */ diff -Nru xen-4.6.0/debian/patches/xsa214.patch xen-4.6.5/debian/patches/xsa214.patch --- xen-4.6.0/debian/patches/xsa214.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa214.patch 2017-05-09 13:29:16.000000000 +0000 @@ -0,0 +1,41 @@ +From: Jan Beulich +Subject: x86: discard type information when stealing pages + +While a page having just a single general reference left necessarily +has a zero type reference count too, its type may still be valid (and +in validated state; at present this is only possible and relevant for +PGT_seg_desc_page, as page tables have their type forcibly zapped when +their type reference count drops to zero, and +PGT_{writable,shared}_page pages don't require any validation). In +such a case when the page is being re-used with the same type again, +validation is being skipped. As validation criteria differ between +32- and 64-bit guests, pages to be transferred between guests need to +have their validation indicator zapped (and with it we zap all other +type information at once). + +This is XSA-214. + +Reported-by: Jann Horn +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -4466,6 +4466,17 @@ int steal_page( + y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask); + } while ( y != x ); + ++ /* ++ * With the sole reference dropped temporarily, no-one can update type ++ * information. Type count also needs to be zero in this case, but e.g. ++ * PGT_seg_desc_page may still have PGT_validated set, which we need to ++ * clear before transferring ownership (as validation criteria vary ++ * depending on domain type). ++ */ ++ BUG_ON(page->u.inuse.type_info & (PGT_count_mask | PGT_locked | ++ PGT_pinned)); ++ page->u.inuse.type_info = 0; ++ + /* Swizzle the owner then reinstate the PGC_allocated reference. */ + page_set_owner(page, NULL); + y = page->count_info; diff -Nru xen-4.6.0/debian/patches/xsa215.patch xen-4.6.5/debian/patches/xsa215.patch --- xen-4.6.0/debian/patches/xsa215.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/debian/patches/xsa215.patch 2017-05-09 13:29:42.000000000 +0000 @@ -0,0 +1,37 @@ +From: Jan Beulich +Subject: x86: correct create_bounce_frame + +We may push up to 96 bytes on the guest (kernel) stack, so we should +also cover as much in the early range check. Note that this is the +simplest possible patch, which has the theoretical potential of +breaking a guest: We only really push 96 bytes when invoking the +failsafe callback, ordinary exceptions only have 56 or 64 bytes pushed +(without / with error code respectively). There is, however, no PV OS +known to place a kernel stack there. + +This is XSA-215. + +Reported-by: Jann Horn +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -347,7 +347,7 @@ int80_slow_path: + jmp handle_exception_saved + + /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK: */ +-/* { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS } */ ++/* { RCX, R11, [DS-GS,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS } */ + /* %rdx: trap_bounce, %rbx: struct vcpu */ + /* On return only %rbx and %rdx are guaranteed non-clobbered. */ + create_bounce_frame: +@@ -367,7 +367,7 @@ create_bounce_frame: + 2: andq $~0xf,%rsi # Stack frames are 16-byte aligned. + movq $HYPERVISOR_VIRT_START,%rax + cmpq %rax,%rsi +- movq $HYPERVISOR_VIRT_END+60,%rax ++ movq $HYPERVISOR_VIRT_END+12*8,%rax + sbb %ecx,%ecx # In +ve address space? Then okay. + cmpq %rax,%rsi + adc %ecx,%ecx # Above Xen private area? Then okay. diff -Nru xen-4.6.0/docs/man/xl.cfg.pod.5 xen-4.6.5/docs/man/xl.cfg.pod.5 --- xen-4.6.0/docs/man/xl.cfg.pod.5 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/docs/man/xl.cfg.pod.5 2017-03-07 16:19:05.000000000 +0000 @@ -741,14 +741,17 @@ =item B -(PV only) By default pciback only allows PV guests to write "known -safe" values into PCI config space. But many devices require writes -to other areas of config space in order to operate properly. This -tells the pciback driver to allow all writes to PCI config space of -this device by this domain. This option should be enabled with -caution: it gives the guest much more control over the device, which -may have security or stability implications. It is recommended to -enable this option only for trusted VMs under administrator control. +By default pciback only allows PV guests to write "known safe" values +into PCI config space, likewise QEMU (both qemu-xen and +qemu-traditional) imposes the same contraint on HVM guests. However +many devices require writes to other areas of config space in order to +operate properly. This option tells the backend (pciback or QEMU) to +allow all writes to PCI config space of this device by this domain. + +This option should be enabled with caution: it gives the guest much +more control over the device, which may have security or stability +implications. It is recommended to enable this option only for +trusted VMs under administrator control. =item B @@ -787,9 +790,8 @@ =item B -(PV only) Changes the default value of 'permissive' for all PCI -devices passed through to this VM. See L -above. +Changes the default value of 'permissive' for all PCI devices passed +through to this VM. See L above. =item B diff -Nru xen-4.6.0/docs/misc/xen-command-line.markdown xen-4.6.5/docs/misc/xen-command-line.markdown --- xen-4.6.0/docs/misc/xen-command-line.markdown 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/docs/misc/xen-command-line.markdown 2017-03-07 16:19:05.000000000 +0000 @@ -736,13 +736,18 @@ ### gnttab\_max\_frames > `= ` -Specify the maximum number of frames per grant table operation. +> Default: `32` + +Specify the maximum number of frames which any domain may use as part +of its grant table. ### gnttab\_max\_maptrack\_frames > `= ` -Specify the maximum number of maptrack frames domain. -The default value is 8 times **gnttab\_max\_frames**. +> Default: `8 * gnttab_max_frames` + +Specify the maximum number of frames to use as part of a domains +maptrack array. ### gnttab\_max\_nr\_frames > `= ` @@ -1028,6 +1033,17 @@ Specify the threshold below which Xen will inform dom0 that the quantity of free memory is getting low. Specifying `0` will disable this notification. +### memop-max-order +> `= [][,[][,[][,]]]` + +> x86 default: `9,18,12,12` +> ARM default: `9,18,10,10` + +Change the maximum order permitted for allocation (or allocation-like) +requests issued by the various kinds of domains (in this order: +ordinary DomU, control domain, hardware domain, and - when supported +by the platform - DomU with pass-through device assigned). + ### max\_cstate > `= ` @@ -1064,6 +1080,15 @@ Specify if the MMConfig space should be enabled. +### mmio-relax +> `= | all` + +> Default: `false` + +By default, domains may not create cached mappings to MMIO regions. +This option relaxes the check for Domain 0 (or when using `all`, all PV +domains), to permit the use of cacheable MMIO mappings. + ### msi > `= ` @@ -1463,8 +1488,8 @@ Note that if **watchdog** option is also specified vpmu will be turned off. *Warning:* -As the BTS virtualisation is not 100% safe and because of the nehalem quirk -don't use the vpmu flag on production systems with Intel cpus! +As the virtualisation is not 100% safe, don't use the vpmu flag on +production systems (see http://xenbits.xen.org/xsa/advisory-163.html)! ### watchdog > `= force | ` diff -Nru xen-4.6.0/docs/misc/xenstore-paths.markdown xen-4.6.5/docs/misc/xenstore-paths.markdown --- xen-4.6.0/docs/misc/xenstore-paths.markdown 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/docs/misc/xenstore-paths.markdown 2017-03-07 16:19:05.000000000 +0000 @@ -240,7 +240,7 @@ A secondary PV console device. Described in [console.txt](console.txt) -#### ~/device/serial/$DEVID/* [HVM] +#### ~/serial/$DEVID/* [HVM] An emulated serial device. Described in [console.txt](console.txt) @@ -389,6 +389,25 @@ ### libxl Specific Paths +#### /libxl/$DOMID/device/$KIND/$DEVID + +Created by libxl for every frontend/backend pair created for $DOMID. +Used by libxl for enumeration and management of the device. + +#### /libxl/$DOMID/device/$KIND/$DEVID/frontend + +Path in xenstore to the frontend, normally +/local/domain/$DOMID/device/$KIND/$DEVID + +#### /libxl/$DOMID/device/$KIND/$DEVID/backend + +Path in xenstore to the backend, normally +/local/domain/$BACKEND_DOMID/backend/$KIND/$DOMID/$DEVID + +#### /libxl/$DOMID/device/$KIND/$DEVID/$NODE + +Trustworthy copy of /local/domain/$DOMID/backend/$KIND/$DEVID/$NODE. + #### /libxl/$DOMID/dm-version ("qemu\_xen"|"qemu\_xen\_traditional") = [n,INTERNAL] The device model version for a domain. diff -Nru xen-4.6.0/extras/mini-os/app.lds xen-4.6.5/extras/mini-os/app.lds --- xen-4.6.0/extras/mini-os/app.lds 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/app.lds 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,11 @@ +SECTIONS +{ + .app.bss : { + __app_bss_start = . ; + *(.bss .bss.*) + *(COMMON) + *(.lbss .lbss.*) + *(LARGE_COMMON) + __app_bss_end = . ; + } +} diff -Nru xen-4.6.0/extras/mini-os/arch/arm/arm32.S xen-4.6.5/extras/mini-os/arch/arm/arm32.S --- xen-4.6.0/extras/mini-os/arch/arm/arm32.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/arm/arm32.S 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,294 @@ +@ Offset of the kernel within the RAM. This is a Linux/zImage convention which we +@ rely on for now. +#define ZIMAGE_KERNEL_OFFSET 0x8000 + +.section .text + +.globl _start +_start: + @ zImage header +.rept 8 + mov r0, r0 +.endr + b reset + .word 0x016f2818 @ Magic numbers to help the loader + .word 0 @ zImage start address (0 = relocatable) + .word _edata - _start @ zImage end address (excludes bss section) + @ end of zImage header + +@ Called at boot time. Sets up MMU, exception vectors and stack, and then calls C arch_init() function. +@ => r2 -> DTB +@ <= never returns +@ Note: this boot code needs to be within the first (1MB - ZIMAGE_KERNEL_OFFSET) of _start. +reset: + @ Problem: the C code wants to be at a known address (_start), but Xen might + @ load us anywhere. We initialise the MMU (mapping virtual to physical @ addresses) + @ so everything ends up where the code expects it to be. + @ + @ We calculate the offet between where the linker thought _start would be and where + @ it actually is and initialise the page tables to have that offset for every page. + @ + @ When we turn on the MMU, we're still executing at the old address. We don't want + @ the code to disappear from under us. So we have to do the mapping in stages: + @ + @ 1. set up a mapping to our current page from both its current and desired addresses + @ 2. enable the MMU + @ 3. jump to the new address + @ 4. remap all the other pages with the calculated offset + + adr r1, _start @ r1 = physical address of _start + ldr r3, =_start @ r3 = (desired) virtual address of _start + sub r9, r1, r3 @ r9 = (physical - virtual) offset + + ldr r7, =_page_dir @ r7 = (desired) virtual addr of translation table + add r1, r7, r9 @ r1 = physical addr of translation table + + @ Tell the system where our page table is located. + @ This is the 16 KB top-level translation table, in which + @ each word maps one 1MB virtual section to a physical section. + @ Note: We leave TTBCR as 0, meaning that only TTBR0 is used and + @ we use the short-descriptor format (32-bit physical addresses). + orr r0, r1, #0b0001011 @ Sharable, Inner/Outer Write-Back Write-Allocate Cacheable + mcr p15, 0, r0, c2, c0, 0 @ set TTBR0 + + @ Set access permission for domains. + @ Domains are deprecated, but we have to configure them anyway. + @ We mark every page as being domain 0 and set domain 0 to "client mode" + @ (client mode = use access flags in page table). + mov r0, #1 @ 1 = client + mcr p15, 0, r0, c3, c0, 0 @ DACR + + @ Template (flags) for a 1 MB page-table entry. + @ TEX[2:0] C B = 001 1 1 (outer and inner write-back, write-allocate) + ldr r8, =(0x2 + /* Section entry */ \ + 0xc + /* C B */ \ + (3 << 10) + /* Read/write */ \ + (1 << 12) + /* TEX */ \ + (1 << 16) + /* Sharable */ \ + (1<<19)) /* Non-secure */ + @ r8 = template page table entry + + @ Add an entry for the current physical section, at the old and new + @ addresses. It's OK if they're the same. + mov r0, pc, lsr#20 + mov r0, r0, lsl#20 @ r0 = physical address of this code's section start + orr r3, r0, r8 @ r3 = table entry for this section + ldr r4, =_start @ r4 = desired virtual address of this section + str r3, [r1, r4, lsr#18] @ map desired virtual section to this code + str r3, [r1, r0, lsr#18] @ map current section to this code too + + @ Invalidate TLB + dsb @ Caching is off, but must still prevent reordering + mcr p15, 0, r1, c8, c7, 0 @ TLBIALL + + @ Enable MMU / SCTLR + mrc p15, 0, r1, c1, c0, 0 @ SCTLR + orr r1, r1, #3 << 11 @ enable icache, branch prediction + orr r1, r1, #4 + 1 @ enable dcache, MMU + mcr p15, 0, r1, c1, c0, 0 @ SCTLR + isb + + ldr r1, =stage2 @ Virtual address of stage2 + bx r1 + +@ Called once the MMU is enabled. The boot code and the page table are mapped, +@ but nothing else is yet. +@ +@ => r2 -> dtb (physical) +@ r7 = virtual address of page table +@ r8 = section entry template (flags) +@ r9 = desired physical - virtual offset +@ pc -> somewhere in newly-mapped virtual code section +stage2: + @ Invalidate TLB + mcr p15, 0, r1, c8, c7, 0 @ TLBIALL + isb + + @ The new mapping has now taken effect: + @ r7 -> page_dir + + @ Fill in the whole top-level translation table (at page_dir). + @ Populate the whole pagedir with 1MB section descriptors. + + mov r1, r7 @ r1 -> first section entry + add r3, r1, #4*4*1024 @ limit (4 GB address space, 4 byte entries) + orr r0, r8, r9 @ r0 = entry mapping section zero to start of physical RAM +1: + str r0, [r1],#4 @ write the section entry + add r0, r0, #1 << 20 @ next physical page (wraps) + cmp r1, r3 + bne 1b + + @ Invalidate TLB + dsb + mcr p15, 0, r1, c8, c7, 0 @ TLBIALL + isb + + @ Set VBAR -> exception_vector_table + @ SCTLR.V = 0 + adr r0, exception_vector_table + mcr p15, 0, r0, c12, c0, 0 + + @ Enable hardware floating point: + @ 1. Access to CP10 and CP11 must be enabled in the Coprocessor Access + @ Control Register (CP15.CACR): + mrc p15, 0, r1, c1, c0, 2 @ CACR + orr r1, r1, #(3 << 20) + (3 << 22) @ full access for CP10 & CP11 + mcr p15, 0, r1, c1, c0, 2 + @ 2. The EN bit in the FPEXC register must be set: + vmrs r0, FPEXC + orr r0, r0, #1<<30 @ EN (enable) + vmsr FPEXC, r0 + + @ Initialise 16 KB stack + ldr sp, =_boot_stack_end + + sub r0, r2, r9 @ r0 -> device tree (virtual address) + mov r1, r9 @ r1 = physical_address_offset + + b arch_init + +.pushsection .bss +@ Note: calling arch_init zeroes out this region. +.align 12 +.globl shared_info_page +shared_info_page: + .fill (1024), 4, 0x0 + +.align 3 +.globl irqstack +.globl irqstack_end +irqstack: + .fill (1024), 4, 0x0 +irqstack_end: + +fault_dump: + .fill 18, 4, 0x0 @ On fault, we save the registers + CPSR + handler address + +.popsection + +fault: + cpsid aif @ Disable interrupts + + ldr r13, =fault_dump + stmia r13, {r0-r12} @ Dump the non-banked registers directly (well, unless from FIQ mode) + str r14, [r13, #15 << 2] @ Our r14 is the faulting r15 + mov r0, r13 + + @ Save the caller's CPSR (our SPSR) too. + mrs r1, SPSR + str r1, [r13, #16 << 2] + + @ Switch to the mode we came from to get r13 and r14. + @ If coming from user mode, use System mode instead so we're still + @ privileged. + and r1, r1, #0x1f @ r1 = SPSR mode + cmp r1, #0x10 @ If from User mode + moveq r1, #0x1f @ Then use System mode instead + + mrs r3, CPSR @ r3 = our CPSR + bic r2, r3, #0x1f + orr r2, r2, r1 + msr CPSR, r2 @ Change to mode r1 + + @ Save old mode's r13, r14 + str r13, [r0, #13 << 2] + str r14, [r0, #14 << 2] + + msr CPSR, r3 @ Back to fault mode + + ldr r1, [r0, #17 << 2] + sub r1, r1, #12 @ Fix to point at start of handler + str r1, [r0, #17 << 2] + + @ Call C code to format the register dump. + @ Clobbers the stack, but we're not going to return anyway. + ldr sp, =_boot_stack_end + bl dump_registers + b do_exit + +@ We want to store a unique value to identify this handler, without corrupting +@ any of the registers. So, we store r15 (which will point just after the branch). +@ Later, we subtract 12 so the user gets pointed at the start of the exception +@ handler. +#define FAULT(name) \ +.globl fault_##name; \ +fault_##name: \ + ldr r13, =fault_dump; \ + str r15, [r13, #17 << 2]; \ + b fault + +FAULT(reset) +FAULT(undefined_instruction) +FAULT(svc) +FAULT(prefetch_call) +FAULT(prefetch_abort) +FAULT(data_abort) + +@ exception base address +.align 5 +.globl exception_vector_table +@ Note: remember to call CLREX if returning from an exception: +@ "The architecture enables the local monitor to treat any exclusive store as +@ matching a previous LDREX address. For this reason, use of the CLREX +@ instruction to clear an existing tag is required on context switches." +@ -- ARM Cortex-A Series Programmer’s Guide (Version: 4.0) +exception_vector_table: + b fault_reset + b fault_undefined_instruction + b fault_svc + b fault_prefetch_call + b fault_prefetch_abort + b fault_data_abort + b irq_handler @ IRQ + .word 0xe7f000f0 @ abort on FIQ + +@ Call fault_undefined_instruction in "Undefined mode" +bug: + .word 0xe7f000f0 @ und/udf - a "Permanently Undefined" instruction + +irq_handler: + ldr sp, =irqstack_end + push {r0 - r12, r14} + + ldr r0, IRQ_handler + cmp r0, #0 + beq bug + blx r0 @ call handler + + @ Return from IRQ + pop {r0 - r12, r14} + clrex + subs pc, lr, #4 + +.globl IRQ_handler +IRQ_handler: + .long 0x0 + + +.globl __arch_switch_threads +@ => r0 = &prev->sp +@ r1 = &next->sp +@ <= returns to next thread's saved return address +__arch_switch_threads: + push {r4-r11} @ Store callee-saved registers to old thread's stack + stmia r0, {sp, lr} @ Store current sp and ip to prev's struct thread + + ldmia r1, {sp, lr} @ Load new sp, ip from next's struct thread + pop {r4-r11} @ Load callee-saved registers from new thread's stack + + bx lr + +@ This is called if you try to divide by zero. For now, we make a supervisor call, +@ which will make us halt. +.globl raise +raise: + svc 0 + +.globl arm_start_thread +arm_start_thread: + pop {r0, r1} + @ r0 = user data + @ r1 -> thread's main function + ldr lr, =exit_thread + bx r1 diff -Nru xen-4.6.0/extras/mini-os/arch/arm/events.c xen-4.6.5/extras/mini-os/arch/arm/events.c --- xen-4.6.0/extras/mini-os/arch/arm/events.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/arm/events.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,31 @@ +#include +#include +#include +#include + +static void virq_debug(evtchn_port_t port, struct pt_regs *regs, void *params) +{ + printk("Received a virq_debug event\n"); +} + +evtchn_port_t debug_port = -1; +void arch_init_events(void) +{ + debug_port = bind_virq(VIRQ_DEBUG, (evtchn_handler_t)virq_debug, 0); + if(debug_port == -1) + BUG(); + unmask_evtchn(debug_port); +} + +void arch_unbind_ports(void) +{ + if(debug_port != -1) + { + mask_evtchn(debug_port); + unbind_evtchn(debug_port); + } +} + +void arch_fini_events(void) +{ +} diff -Nru xen-4.6.0/extras/mini-os/arch/arm/hypercalls32.S xen-4.6.5/extras/mini-os/arch/arm/hypercalls32.S --- xen-4.6.0/extras/mini-os/arch/arm/hypercalls32.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/arm/hypercalls32.S 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,64 @@ +/****************************************************************************** + * hypercall.S + * + * Xen hypercall wrappers + * + * Stefano Stabellini , Citrix, 2012 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#define __HVC(imm16) .long ((0xE1400070 | (((imm16) & 0xFFF0) << 4) | ((imm16) & 0x000F)) & 0xFFFFFFFF) + +#define XEN_IMM 0xEA1 + +#define HYPERCALL_SIMPLE(hypercall) \ +.globl HYPERVISOR_##hypercall; \ +.align 4,0x90; \ +HYPERVISOR_##hypercall: \ + mov r12, #__HYPERVISOR_##hypercall; \ + __HVC(XEN_IMM); \ + mov pc, lr; + +#define _hypercall0 HYPERCALL_SIMPLE +#define _hypercall1 HYPERCALL_SIMPLE +#define _hypercall2 HYPERCALL_SIMPLE +#define _hypercall3 HYPERCALL_SIMPLE +#define _hypercall4 HYPERCALL_SIMPLE + +_hypercall2(sched_op); +_hypercall2(memory_op); +_hypercall2(event_channel_op); +_hypercall2(xen_version); +_hypercall3(console_io); +_hypercall1(physdev_op); +_hypercall3(grant_table_op); +_hypercall3(vcpu_op); +_hypercall1(sysctl); +_hypercall1(domctl); +_hypercall2(hvm_op); +_hypercall1(xsm_op); diff -Nru xen-4.6.0/extras/mini-os/arch/arm/minios-arm32.lds xen-4.6.5/extras/mini-os/arch/arm/minios-arm32.lds --- xen-4.6.0/extras/mini-os/arch/arm/minios-arm32.lds 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/arm/minios-arm32.lds 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,83 @@ +OUTPUT_ARCH(arm) +ENTRY(_start) +SECTIONS +{ + /* Note: we currently assume that Xen will load the kernel image + * at start-of-RAM + 0x8000. We use this initial 32 KB for the stack + * and translation tables. + */ + _boot_stack = 0x400000; /* 16 KB boot stack */ + _boot_stack_end = 0x404000; + _page_dir = 0x404000; /* 16 KB translation table */ + . = 0x408000; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + . = ALIGN(4096); + _erodata = .; + + /* newlib initialization functions */ + . = ALIGN(32 / 8); + PROVIDE (__preinit_array_start = .); + .preinit_array : { *(.preinit_array) } + PROVIDE (__preinit_array_end = .); + PROVIDE (__init_array_start = .); + .init_array : { *(.init_array) } + PROVIDE (__init_array_end = .); + PROVIDE (__fini_array_start = .); + .fini_array : { *(.fini_array) } + PROVIDE (__fini_array_end = .); + + .ctors : { + __CTOR_LIST__ = .; + *(.ctors) + CONSTRUCTORS + LONG(0) + __CTOR_END__ = .; + } + + .dtors : { + __DTOR_LIST__ = .; + *(.dtors) + LONG(0) + __DTOR_END__ = .; + } + + .data : { /* Data */ + *(.data) + } + + /* Note: linker will insert any extra sections here, just before .bss */ + + .bss : { + _edata = .; /* End of data included in image */ + /* Nothing after here is included in the zImage's size */ + + __bss_start = .; + *(.bss) + *(.app.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff -Nru xen-4.6.0/extras/mini-os/arch/arm/mm.c xen-4.6.5/extras/mini-os/arch/arm/mm.c --- xen-4.6.0/extras/mini-os/arch/arm/mm.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/arm/mm.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,139 @@ +#include +#include +#include +#include +#include +#include + +uint32_t physical_address_offset; + +unsigned long allocate_ondemand(unsigned long n, unsigned long alignment) +{ + // FIXME + BUG(); +} + +void arch_init_mm(unsigned long *start_pfn_p, unsigned long *max_pfn_p) +{ + int memory; + int prop_len = 0; + const uint64_t *regs; + + printk(" _text: %p(VA)\n", &_text); + printk(" _etext: %p(VA)\n", &_etext); + printk(" _erodata: %p(VA)\n", &_erodata); + printk(" _edata: %p(VA)\n", &_edata); + printk(" stack start: %p(VA)\n", _boot_stack); + printk(" _end: %p(VA)\n", &_end); + + if (fdt_num_mem_rsv(device_tree) != 0) + printk("WARNING: reserved memory not supported!\n"); + + memory = fdt_node_offset_by_prop_value(device_tree, -1, "device_type", "memory", sizeof("memory")); + if (memory < 0) { + printk("No memory found in FDT!\n"); + BUG(); + } + + /* Xen will always provide us at least one bank of memory. + * Mini-OS will use the first bank for the time-being. */ + regs = fdt_getprop(device_tree, memory, "reg", &prop_len); + + /* The property must contain at least the start address + * and size, each of which is 8-bytes. */ + if (regs == NULL || prop_len < 16) { + printk("Bad 'reg' property: %p %d\n", regs, prop_len); + BUG(); + } + + unsigned int end = (unsigned int) &_end; + paddr_t mem_base = fdt64_to_cpu(regs[0]); + uint64_t mem_size = fdt64_to_cpu(regs[1]); + printk("Found memory at 0x%llx (len 0x%llx)\n", + (unsigned long long) mem_base, (unsigned long long) mem_size); + + BUG_ON(to_virt(mem_base) > (void *) &_text); /* Our image isn't in our RAM! */ + *start_pfn_p = PFN_UP(to_phys(end)); + uint64_t heap_len = mem_size - (PFN_PHYS(*start_pfn_p) - mem_base); + *max_pfn_p = *start_pfn_p + PFN_DOWN(heap_len); + + printk("Using pages %lu to %lu as free space for heap.\n", *start_pfn_p, *max_pfn_p); + + /* The device tree is probably in memory that we're about to hand over to the page + * allocator, so move it to the end and reserve that space. + */ + uint32_t fdt_size = fdt_totalsize(device_tree); + void *new_device_tree = to_virt(((*max_pfn_p << PAGE_SHIFT) - fdt_size) & PAGE_MASK); + if (new_device_tree != device_tree) { + memmove(new_device_tree, device_tree, fdt_size); + } + device_tree = new_device_tree; + *max_pfn_p = to_phys(new_device_tree) >> PAGE_SHIFT; +} + +void arch_init_p2m(unsigned long max_pfn) +{ +} + +void arch_init_demand_mapping_area(unsigned long cur_pfn) +{ +} + +/* Get Xen's suggested physical page assignments for the grant table. */ +static paddr_t get_gnttab_base(void) +{ + int hypervisor; + int len = 0; + const uint64_t *regs; + paddr_t gnttab_base; + + hypervisor = fdt_node_offset_by_compatible(device_tree, -1, "xen,xen"); + BUG_ON(hypervisor < 0); + + regs = fdt_getprop(device_tree, hypervisor, "reg", &len); + /* The property contains the address and size, 8-bytes each. */ + if (regs == NULL || len < 16) { + printk("Bad 'reg' property: %p %d\n", regs, len); + BUG(); + } + + gnttab_base = fdt64_to_cpu(regs[0]); + + printk("FDT suggests grant table base %llx\n", (unsigned long long) gnttab_base); + + return gnttab_base; +} + +grant_entry_t *arch_init_gnttab(int nr_grant_frames) +{ + struct xen_add_to_physmap xatp; + struct gnttab_setup_table setup; + xen_pfn_t frames[nr_grant_frames]; + paddr_t gnttab_table; + int i, rc; + + gnttab_table = get_gnttab_base(); + + for (i = 0; i < nr_grant_frames; i++) + { + xatp.domid = DOMID_SELF; + xatp.size = 0; /* Seems to be unused */ + xatp.space = XENMAPSPACE_grant_table; + xatp.idx = i; + xatp.gpfn = (gnttab_table >> PAGE_SHIFT) + i; + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp); + BUG_ON(rc != 0); + } + + setup.dom = DOMID_SELF; + setup.nr_frames = nr_grant_frames; + set_xen_guest_handle(setup.frame_list, frames); + HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); + if (setup.status != 0) + { + printk("GNTTABOP_setup_table failed; status = %d\n", setup.status); + BUG(); + } + + return to_virt(gnttab_table); +} diff -Nru xen-4.6.0/extras/mini-os/arch/arm/panic.c xen-4.6.5/extras/mini-os/arch/arm/panic.c --- xen-4.6.0/extras/mini-os/arch/arm/panic.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/arm/panic.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,98 @@ +/****************************************************************************** + * panic.c + * + * Displays a register dump and stack trace for debugging. + * + * Copyright (c) 2014, Thomas Leonard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +extern int irqstack[]; +extern int irqstack_end[]; + +typedef void handler(void); + +extern handler fault_reset; +extern handler fault_undefined_instruction; +extern handler fault_svc; +extern handler fault_prefetch_call; +extern handler fault_prefetch_abort; +extern handler fault_data_abort; + +void dump_registers(int *saved_registers) { + static int in_dump = 0; + int *sp, *stack_top, *x; + char *fault_name; + void *fault_handler; + int i; + + if (in_dump) + { + printk("Crash while in dump_registers! Not generating a second report.\n"); + return; + } + + in_dump = 1; + + fault_handler = (handler *) saved_registers[17]; + if (fault_handler == fault_reset) + fault_name = "reset"; + else if (fault_handler == fault_undefined_instruction) + fault_name = "undefined_instruction"; + else if (fault_handler == fault_svc) + fault_name = "svc"; + else if (fault_handler == fault_prefetch_call) + fault_name = "prefetch_call"; + else if (fault_handler == fault_prefetch_abort) + fault_name = "prefetch_abort"; + else if (fault_handler == fault_data_abort) + fault_name = "data_abort"; + else + fault_name = "unknown fault type!"; + + printk("Fault handler at %p called (%s)\n", fault_handler, fault_name); + + for (i = 0; i < 16; i++) { + printk("r%d = %x\n", i, saved_registers[i]); + } + printk("CPSR = %x\n", saved_registers[16]); + + printk("Stack dump (innermost last)\n"); + sp = (int *) saved_registers[13]; + + if (sp >= _boot_stack && sp <= _boot_stack_end) + stack_top = _boot_stack_end; /* The boot stack */ + else if (sp >= irqstack && sp <= irqstack_end) + stack_top = irqstack_end; /* The IRQ stack */ + else + stack_top = (int *) ((((unsigned long) sp) | (__STACK_SIZE-1)) + 1); /* A normal thread stack */ + + for (x = stack_top - 1; x >= sp; x--) + { + printk(" [%8p] %8x\n", x, *x); + } + printk("End of stack\n"); + + in_dump = 0; +} diff -Nru xen-4.6.0/extras/mini-os/arch/arm/sched.c xen-4.6.5/extras/mini-os/arch/arm/sched.c --- xen-4.6.0/extras/mini-os/arch/arm/sched.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/arm/sched.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,47 @@ +#include +#include +#include + +void arm_start_thread(void); + +/* The AAPCS requires the callee (e.g. __arch_switch_threads) to preserve r4-r11. */ +#define CALLEE_SAVED_REGISTERS 8 + +/* Architecture specific setup of thread creation */ +struct thread* arch_create_thread(char *name, void (*function)(void *), + void *data) +{ + struct thread *thread; + + thread = xmalloc(struct thread); + /* We can't use lazy allocation here since the trap handler runs on the stack */ + thread->stack = (char *)alloc_pages(STACK_SIZE_PAGE_ORDER); + thread->name = name; + printk("Thread \"%s\": pointer: 0x%p, stack: 0x%p\n", name, thread, + thread->stack); + + /* Save pointer to the thread on the stack, used by current macro */ + *((unsigned long *)thread->stack) = (unsigned long)thread; + + /* Push the details to pass to arm_start_thread onto the stack. */ + int *sp = (int *) (thread->stack + STACK_SIZE); + *(--sp) = (int) function; + *(--sp) = (int) data; + + /* We leave room for the 8 callee-saved registers which we will + * try to restore on thread switch, even though they're not needed + * for the initial switch. */ + thread->sp = (unsigned long) sp - 4 * CALLEE_SAVED_REGISTERS; + + thread->ip = (unsigned long) arm_start_thread; + + return thread; +} + +void run_idle_thread(void) +{ + __asm__ __volatile__ ("mov sp, %0; bx %1":: + "r"(idle_thread->sp + 4 * CALLEE_SAVED_REGISTERS), + "r"(idle_thread->ip)); + /* Never arrive here! */ +} diff -Nru xen-4.6.0/extras/mini-os/arch/arm/setup.c xen-4.6.5/extras/mini-os/arch/arm/setup.c --- xen-4.6.0/extras/mini-os/arch/arm/setup.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/arm/setup.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,119 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This structure contains start-of-day info, such as pagetable base pointer, + * address of the shared_info structure, and things like that. + * On x86, the hypervisor passes it to us. On ARM, we fill it in ourselves. + */ +union start_info_union start_info_union; + +/* + * Shared page for communicating with the hypervisor. + * Events flags go here, for example. + */ +shared_info_t *HYPERVISOR_shared_info; + +extern char shared_info_page[PAGE_SIZE]; + +void *device_tree; + +static int hvm_get_parameter(int idx, uint64_t *value) +{ + struct xen_hvm_param xhv; + int ret; + + xhv.domid = DOMID_SELF; + xhv.index = idx; + ret = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); + if (ret < 0) { + BUG(); + } + *value = xhv.value; + return ret; +} + +static void get_console(void) +{ + uint64_t v = -1; + + hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); + start_info.console.domU.evtchn = v; + + hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); + start_info.console.domU.mfn = v; + + printk("Console is on port %d\n", start_info.console.domU.evtchn); + printk("Console ring is at mfn %lx\n", (unsigned long) start_info.console.domU.mfn); +} + +void get_xenbus(void) +{ + uint64_t value; + + if (hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &value)) + BUG(); + + start_info.store_evtchn = (int)value; + + if(hvm_get_parameter(HVM_PARAM_STORE_PFN, &value)) + BUG(); + start_info.store_mfn = (unsigned long)value; +} + +/* + * INITIAL C ENTRY POINT. + */ +void arch_init(void *dtb_pointer, uint32_t physical_offset) +{ + struct xen_add_to_physmap xatp; + int r; + + memset(&__bss_start, 0, &_end - &__bss_start); + + physical_address_offset = physical_offset; + + xprintk("Virtual -> physical offset = %x\n", physical_address_offset); + + xprintk("Checking DTB at %p...\n", dtb_pointer); + + if ((r = fdt_check_header(dtb_pointer))) { + xprintk("Invalid DTB from Xen: %s\n", fdt_strerror(r)); + BUG(); + } + device_tree = dtb_pointer; + + /* Map shared_info page */ + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = virt_to_pfn(shared_info_page); + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp) != 0) + BUG(); + HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; + + /* Fill in start_info */ + get_console(); + get_xenbus(); + + gic_init(); + + start_kernel(); +} + +void +arch_fini(void) +{ +} + +void +arch_do_exit(void) +{ +} diff -Nru xen-4.6.0/extras/mini-os/arch/arm/time.c xen-4.6.5/extras/mini-os/arch/arm/time.c --- xen-4.6.0/extras/mini-os/arch/arm/time.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/arm/time.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,136 @@ +#include +#include +#include +#include +#include +#include +#include + +//#define VTIMER_DEBUG +#ifdef VTIMER_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=vtimer.c, line=%d) " _f , __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + +/************************************************************************ + * Time functions + *************************************************************************/ + +static uint64_t cntvct_at_init; +static uint32_t counter_freq; + +/* Compute with 96 bit intermediate result: (a*b)/c */ +uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) +{ + union { + uint64_t ll; + struct { + uint32_t low, high; + } l; + } u, res; + uint64_t rl, rh; + + u.ll = a; + rl = (uint64_t)u.l.low * (uint64_t)b; + rh = (uint64_t)u.l.high * (uint64_t)b; + rh += (rl >> 32); + res.l.high = rh / c; + res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; + return res.ll; +} + +static inline s_time_t ticks_to_ns(uint64_t ticks) +{ + return muldiv64(ticks, SECONDS(1), counter_freq); +} + +static inline uint64_t ns_to_ticks(s_time_t ns) +{ + return muldiv64(ns, counter_freq, SECONDS(1)); +} + +/* Wall-clock time is not currently available on ARM, so this is always zero for now: + * http://wiki.xenproject.org/wiki/Xen_ARM_TODO#Expose_Wallclock_time_to_guests + */ +static struct timespec shadow_ts; + +static inline uint64_t read_virtual_count(void) +{ + uint32_t c_lo, c_hi; + __asm__ __volatile__("mrrc p15, 1, %0, %1, c14":"=r"(c_lo), "=r"(c_hi)); + return (((uint64_t) c_hi) << 32) + c_lo; +} + +/* monotonic_clock(): returns # of nanoseconds passed since time_init() + * Note: This function is required to return accurate + * time even in the absence of multiple timer ticks. + */ +uint64_t monotonic_clock(void) +{ + return ticks_to_ns(read_virtual_count() - cntvct_at_init); +} + +int gettimeofday(struct timeval *tv, void *tz) +{ + uint64_t nsec = monotonic_clock(); + nsec += shadow_ts.tv_nsec; + + tv->tv_sec = shadow_ts.tv_sec; + tv->tv_sec += NSEC_TO_SEC(nsec); + tv->tv_usec = NSEC_TO_USEC(nsec % 1000000000UL); + + return 0; +} + +/* Set the timer and mask. */ +void write_timer_ctl(uint32_t value) { + __asm__ __volatile__( + "mcr p15, 0, %0, c14, c3, 1\n" + "isb"::"r"(value)); +} + +void set_vtimer_compare(uint64_t value) { + DEBUG("New CompareValue : %llx\n", value); + + __asm__ __volatile__("mcrr p15, 3, %0, %H0, c14" + ::"r"(value)); + + /* Enable timer and unmask the output signal */ + write_timer_ctl(1); +} + +void unset_vtimer_compare(void) { + /* Disable timer and mask the output signal */ + write_timer_ctl(2); +} + +void block_domain(s_time_t until) +{ + uint64_t until_count = ns_to_ticks(until) + cntvct_at_init; + ASSERT(irqs_disabled()); + if (read_virtual_count() < until_count) + { + set_vtimer_compare(until_count); + __asm__ __volatile__("wfi"); + unset_vtimer_compare(); + + /* Give the IRQ handler a chance to handle whatever woke us up. */ + local_irq_enable(); + local_irq_disable(); + } +} + +void init_time(void) +{ + printk("Initialising timer interface\n"); + + __asm__ __volatile__("mrc p15, 0, %0, c14, c0, 0":"=r"(counter_freq)); + cntvct_at_init = read_virtual_count(); + printk("Virtual Count register is %llx, freq = %d Hz\n", cntvct_at_init, counter_freq); +} + +void fini_time(void) +{ +} diff -Nru xen-4.6.0/extras/mini-os/arch/x86/arch.mk xen-4.6.5/extras/mini-os/arch/x86/arch.mk --- xen-4.6.0/extras/mini-os/arch/x86/arch.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/arch.mk 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,22 @@ +# +# Architecture special makerules for x86 family +# (including x86_32, x86_32y and x86_64). +# + +ifeq ($(MINIOS_TARGET_ARCH),x86_32) +ARCH_CFLAGS := -m32 -march=i686 +ARCH_LDFLAGS := -m elf_i386 +ARCH_ASFLAGS := -m32 +EXTRA_INC += $(TARGET_ARCH_FAM)/$(MINIOS_TARGET_ARCH) +EXTRA_SRC += arch/$(EXTRA_INC) +endif + +ifeq ($(MINIOS_TARGET_ARCH),x86_64) +ARCH_CFLAGS := -m64 -mno-red-zone -fno-reorder-blocks +ARCH_CFLAGS += -fno-asynchronous-unwind-tables +ARCH_ASFLAGS := -m64 +ARCH_LDFLAGS := -m elf_x86_64 +EXTRA_INC += $(TARGET_ARCH_FAM)/$(MINIOS_TARGET_ARCH) +EXTRA_SRC += arch/$(EXTRA_INC) +endif + diff -Nru xen-4.6.0/extras/mini-os/arch/x86/events.c xen-4.6.5/extras/mini-os/arch/x86/events.c --- xen-4.6.0/extras/mini-os/arch/x86/events.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/events.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,35 @@ +#include +#include +#include + +#if defined(__x86_64__) +char irqstack[2 * STACK_SIZE]; + +static struct pda +{ + int irqcount; /* offset 0 (used in x86_64.S) */ + char *irqstackptr; /* 8 */ +} cpu0_pda; +#endif + +void arch_init_events(void) +{ +#if defined(__x86_64__) + asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); + wrmsrl(0xc0000101, &cpu0_pda); /* 0xc0000101 is MSR_GS_BASE */ + cpu0_pda.irqcount = -1; + cpu0_pda.irqstackptr = (void*) (((unsigned long)irqstack + 2 * STACK_SIZE) + & ~(STACK_SIZE - 1)); +#endif +} + +void arch_unbind_ports(void) +{ +} + +void arch_fini_events(void) +{ +#if defined(__x86_64__) + wrmsrl(0xc0000101, NULL); /* 0xc0000101 is MSR_GS_BASE */ +#endif +} diff -Nru xen-4.6.0/extras/mini-os/arch/x86/ioremap.c xen-4.6.5/extras/mini-os/arch/x86/ioremap.c --- xen-4.6.0/extras/mini-os/arch/x86/ioremap.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/ioremap.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2009, Netronome Systems, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include +#include +#include +#include +#include + +/* Map a physical address range into virtual address space with provided + * flags. Return a virtual address range it is mapped to. */ +static void *__do_ioremap(unsigned long phys_addr, unsigned long size, + unsigned long prot) +{ + unsigned long va; + unsigned long mfns, mfn; + unsigned long num_pages, offset; + + /* allow non page aligned addresses but for mapping we need to align them */ + offset = (phys_addr & ~PAGE_MASK); + num_pages = (offset + size + PAGE_SIZE - 1) / PAGE_SIZE; + phys_addr &= PAGE_MASK; + mfns = mfn = phys_addr >> PAGE_SHIFT; + + va = (unsigned long)map_frames_ex(&mfns, num_pages, 0, 1, 1, + DOMID_IO, NULL, prot); + return (void *)(va + offset); +} + +void *ioremap(unsigned long phys_addr, unsigned long size) +{ + return __do_ioremap(phys_addr, size, IO_PROT); +} + +void *ioremap_nocache(unsigned long phys_addr, unsigned long size) +{ + return __do_ioremap(phys_addr, size, IO_PROT_NOCACHE); +} + +/* Un-map the io-remapped region. Currently no list of existing mappings is + * maintained, so the caller has to supply the size */ +void iounmap(void *virt_addr, unsigned long size) +{ + unsigned long num_pages; + unsigned long va = (unsigned long)virt_addr; + + /* work out number of frames to unmap */ + num_pages = ((va & ~PAGE_MASK) + size + PAGE_SIZE - 1) / PAGE_SIZE; + + unmap_frames(va & PAGE_MASK, num_pages); +} + + + +/* -*- Mode:C; c-basic-offset:4; tab-width:4 indent-tabs-mode:nil -*- */ diff -Nru xen-4.6.0/extras/mini-os/arch/x86/iorw.c xen-4.6.5/extras/mini-os/arch/x86/iorw.c --- xen-4.6.0/extras/mini-os/arch/x86/iorw.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/iorw.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,35 @@ +#include + +void iowrite8(volatile void* addr, uint8_t val) +{ + *((volatile uint8_t*)addr) = val; +} +void iowrite16(volatile void* addr, uint16_t val) +{ + *((volatile uint16_t*)addr) = val; +} +void iowrite32(volatile void* addr, uint32_t val) +{ + *((volatile uint32_t*)addr) = val; +} +void iowrite64(volatile void* addr, uint64_t val) +{ + *((volatile uint64_t*)addr) = val; +} + +uint8_t ioread8(volatile void* addr) +{ + return *((volatile uint8_t*) addr); +} +uint16_t ioread16(volatile void* addr) +{ + return *((volatile uint16_t*) addr); +} +uint32_t ioread32(volatile void* addr) +{ + return *((volatile uint32_t*) addr); +} +uint64_t ioread64(volatile void* addr) +{ + return *((volatile uint64_t*) addr); +} diff -Nru xen-4.6.0/extras/mini-os/arch/x86/Makefile xen-4.6.5/extras/mini-os/arch/x86/Makefile --- xen-4.6.0/extras/mini-os/arch/x86/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/Makefile 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,30 @@ +# +# x86 architecture specific makefiles. +# It's is used for x86_32, x86_32y and x86_64 +# + +TOPLEVEL_DIR = $(CURDIR)/../.. +include ../../Config.mk + +# include arch.mk has to be before mini-os.mk! + +include arch.mk +include ../../minios.mk + +# Sources here are all *.c *.S without $(MINIOS_TARGET_ARCH).S +# This is handled in $(HEAD_ARCH_OBJ) +ARCH_SRCS := $(sort $(wildcard *.c)) + +# The objects built from the sources. +ARCH_OBJS := $(patsubst %.c,$(OBJ_DIR)/%.o,$(ARCH_SRCS)) + +all: $(OBJ_DIR)/$(ARCH_LIB) + +# $(HEAD_ARCH_OBJ) is only build here, needed on linking +# in ../../Makefile. +$(OBJ_DIR)/$(ARCH_LIB): $(ARCH_OBJS) $(OBJ_DIR)/$(HEAD_ARCH_OBJ) + $(AR) rv $(OBJ_DIR)/$(ARCH_LIB) $(ARCH_OBJS) + +clean: + rm -f $(OBJ_DIR)/$(ARCH_LIB) $(ARCH_OBJS) $(OBJ_DIR)/$(HEAD_ARCH_OBJ) + diff -Nru xen-4.6.0/extras/mini-os/arch/x86/minios-x86_32.lds xen-4.6.5/extras/mini-os/arch/x86/minios-x86_32.lds --- xen-4.6.0/extras/mini-os/arch/x86/minios-x86_32.lds 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/minios-x86_32.lds 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,74 @@ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = 0x0; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + . = ALIGN(4096); + _erodata = .; + + /* newlib initialization functions */ + . = ALIGN(32 / 8); + PROVIDE (__preinit_array_start = .); + .preinit_array : { *(.preinit_array) } + PROVIDE (__preinit_array_end = .); + PROVIDE (__init_array_start = .); + .init_array : { *(.init_array) } + PROVIDE (__init_array_end = .); + PROVIDE (__fini_array_start = .); + .fini_array : { *(.fini_array) } + PROVIDE (__fini_array_end = .); + + .ctors : { + __CTOR_LIST__ = .; + *(.ctors) + CONSTRUCTORS + LONG(0) + __CTOR_END__ = .; + } + + .dtors : { + __DTOR_LIST__ = .; + *(.dtors) + LONG(0) + __DTOR_END__ = .; + } + + .data : { /* Data */ + *(.data) + } + + _edata = .; /* End of data section */ + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + *(.app.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff -Nru xen-4.6.0/extras/mini-os/arch/x86/minios-x86_64.lds xen-4.6.5/extras/mini-os/arch/x86/minios-x86_64.lds --- xen-4.6.0/extras/mini-os/arch/x86/minios-x86_64.lds 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/minios-x86_64.lds 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,74 @@ +OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") +OUTPUT_ARCH(i386:x86-64) +ENTRY(_start) +SECTIONS +{ + . = 0x0; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + . = ALIGN(4096); + _erodata = .; + + /* newlib initialization functions */ + . = ALIGN(64 / 8); + PROVIDE (__preinit_array_start = .); + .preinit_array : { *(.preinit_array) } + PROVIDE (__preinit_array_end = .); + PROVIDE (__init_array_start = .); + .init_array : { *(.init_array) } + PROVIDE (__init_array_end = .); + PROVIDE (__fini_array_start = .); + .fini_array : { *(.fini_array) } + PROVIDE (__fini_array_end = .); + + .ctors : { + __CTOR_LIST__ = .; + *(.ctors) + CONSTRUCTORS + QUAD(0) + __CTOR_END__ = .; + } + + .dtors : { + __DTOR_LIST__ = .; + *(.dtors) + QUAD(0) + __DTOR_END__ = .; + } + + .data : { /* Data */ + *(.data) + } + + _edata = .; /* End of data section */ + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + *(.app.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff -Nru xen-4.6.0/extras/mini-os/arch/x86/mm.c xen-4.6.5/extras/mini-os/arch/x86/mm.c --- xen-4.6.0/extras/mini-os/arch/x86/mm.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/mm.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,957 @@ +/* + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: mm.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos + * + * Date: Aug 2003, chages Aug 2005 + * + * Environment: Xen Minimal OS + * Description: memory management related functions + * contains buddy page allocator from Xen. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef MM_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=mm.c, line=%d) " _f "\n", __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + +unsigned long *phys_to_machine_mapping; +unsigned long mfn_zero; +extern char stack[]; +extern void page_walk(unsigned long va); + +/* + * Make pt_pfn a new 'level' page table frame and hook it into the page + * table at offset in previous level MFN (pref_l_mfn). pt_pfn is a guest + * PFN. + */ +static void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn, + unsigned long offset, unsigned long level) +{ + pgentry_t *tab = (pgentry_t *)start_info.pt_base; + unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); + pgentry_t prot_e, prot_t; + mmu_update_t mmu_updates[1]; + int rc; + + prot_e = prot_t = 0; + DEBUG("Allocating new L%d pt frame for pfn=%lx, " + "prev_l_mfn=%lx, offset=%lx", + level, *pt_pfn, prev_l_mfn, offset); + + /* We need to clear the page, otherwise we might fail to map it + as a page table page */ + memset((void*) pt_page, 0, PAGE_SIZE); + + switch ( level ) + { + case L1_FRAME: + prot_e = L1_PROT; + prot_t = L2_PROT; + break; + case L2_FRAME: + prot_e = L2_PROT; + prot_t = L3_PROT; + break; +#if defined(__x86_64__) + case L3_FRAME: + prot_e = L3_PROT; + prot_t = L4_PROT; + break; +#endif + default: + printk("new_pt_frame() called with invalid level number %lu\n", level); + do_exit(); + break; + } + + /* Make PFN a page table page */ +#if defined(__x86_64__) + tab = pte_to_virt(tab[l4_table_offset(pt_page)]); +#endif + tab = pte_to_virt(tab[l3_table_offset(pt_page)]); + + mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) + + sizeof(pgentry_t) * l1_table_offset(pt_page); + mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | + (prot_e & ~_PAGE_RW); + + if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 ) + { + printk("ERROR: PTE for new page table page could not be updated\n"); + printk(" mmu_update failed with rc=%d\n", rc); + do_exit(); + } + + /* Hook the new page table page into the hierarchy */ + mmu_updates[0].ptr = + ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; + mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t; + + if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 ) + { + printk("ERROR: mmu_update failed with rc=%d\n", rc); + do_exit(); + } + + *pt_pfn += 1; +} + +/* + * Checks if a pagetable frame is needed at 'level' to map a given + * address. Note, this function is specific to the initial page table + * building. + */ +static int need_pt_frame(unsigned long va, int level) +{ + unsigned long hyp_virt_start = HYPERVISOR_VIRT_START; +#if defined(__x86_64__) + unsigned long hyp_virt_end = HYPERVISOR_VIRT_END; +#else + unsigned long hyp_virt_end = 0xffffffff; +#endif + + /* In general frames will _not_ be needed if they were already + allocated to map the hypervisor into our VA space */ +#if defined(__x86_64__) + if ( level == L3_FRAME ) + { + if ( l4_table_offset(va) >= + l4_table_offset(hyp_virt_start) && + l4_table_offset(va) <= + l4_table_offset(hyp_virt_end)) + return 0; + return 1; + } + else +#endif + + if ( level == L2_FRAME ) + { +#if defined(__x86_64__) + if ( l4_table_offset(va) >= + l4_table_offset(hyp_virt_start) && + l4_table_offset(va) <= + l4_table_offset(hyp_virt_end)) +#endif + if ( l3_table_offset(va) >= + l3_table_offset(hyp_virt_start) && + l3_table_offset(va) <= + l3_table_offset(hyp_virt_end)) + return 0; + + return 1; + } + else + /* Always need l1 frames */ + if ( level == L1_FRAME ) + return 1; + + printk("ERROR: Unknown frame level %d, hypervisor %lx,%lx\n", + level, hyp_virt_start, hyp_virt_end); + return -1; +} + +/* + * Build the initial pagetable. + */ +static void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn) +{ + unsigned long start_address, end_address; + unsigned long pfn_to_map, pt_pfn = *start_pfn; + static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1]; + pgentry_t *tab = (pgentry_t *)start_info.pt_base, page; + unsigned long pt_mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); + unsigned long offset; + int count = 0; + int rc; + + pfn_to_map = + (start_info.nr_pt_frames - NOT_L1_FRAMES) * L1_PAGETABLE_ENTRIES; + + if ( *max_pfn >= virt_to_pfn(HYPERVISOR_VIRT_START) ) + { + printk("WARNING: Mini-OS trying to use Xen virtual space. " + "Truncating memory from %luMB to ", + ((unsigned long)pfn_to_virt(*max_pfn) - + (unsigned long)&_text)>>20); + *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE); + printk("%luMB\n", + ((unsigned long)pfn_to_virt(*max_pfn) - + (unsigned long)&_text)>>20); + } + + start_address = (unsigned long)pfn_to_virt(pfn_to_map); + end_address = (unsigned long)pfn_to_virt(*max_pfn); + + /* We worked out the virtual memory range to map, now mapping loop */ + printk("Mapping memory range 0x%lx - 0x%lx\n", start_address, end_address); + + while ( start_address < end_address ) + { + tab = (pgentry_t *)start_info.pt_base; + pt_mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); + +#if defined(__x86_64__) + offset = l4_table_offset(start_address); + /* Need new L3 pt frame */ + if ( !(start_address & L3_MASK) ) + if ( need_pt_frame(start_address, L3_FRAME) ) + new_pt_frame(&pt_pfn, pt_mfn, offset, L3_FRAME); + + page = tab[offset]; + pt_mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(pt_mfn) << PAGE_SHIFT); +#endif + offset = l3_table_offset(start_address); + /* Need new L2 pt frame */ + if ( !(start_address & L2_MASK) ) + if ( need_pt_frame(start_address, L2_FRAME) ) + new_pt_frame(&pt_pfn, pt_mfn, offset, L2_FRAME); + + page = tab[offset]; + pt_mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(pt_mfn) << PAGE_SHIFT); + offset = l2_table_offset(start_address); + /* Need new L1 pt frame */ + if ( !(start_address & L1_MASK) ) + if ( need_pt_frame(start_address, L1_FRAME) ) + new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME); + + page = tab[offset]; + pt_mfn = pte_to_mfn(page); + offset = l1_table_offset(start_address); + + mmu_updates[count].ptr = + ((pgentry_t)pt_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; + mmu_updates[count].val = + (pgentry_t)pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT; + count++; + if ( count == L1_PAGETABLE_ENTRIES || pfn_to_map == *max_pfn ) + { + rc = HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF); + if ( rc < 0 ) + { + printk("ERROR: build_pagetable(): PTE could not be updated\n"); + printk(" mmu_update failed with rc=%d\n", rc); + do_exit(); + } + count = 0; + } + start_address += PAGE_SIZE; + } + + *start_pfn = pt_pfn; +} + +/* + * Mark portion of the address space read only. + */ +extern struct shared_info shared_info; +static void set_readonly(void *text, void *etext) +{ + unsigned long start_address = + ((unsigned long) text + PAGE_SIZE - 1) & PAGE_MASK; + unsigned long end_address = (unsigned long) etext; + static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1]; + pgentry_t *tab = (pgentry_t *)start_info.pt_base, page; + unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); + unsigned long offset; + int count = 0; + int rc; + + printk("setting %p-%p readonly\n", text, etext); + + while ( start_address + PAGE_SIZE <= end_address ) + { + tab = (pgentry_t *)start_info.pt_base; + mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); + +#if defined(__x86_64__) + offset = l4_table_offset(start_address); + page = tab[offset]; + mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); +#endif + offset = l3_table_offset(start_address); + page = tab[offset]; + mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); + offset = l2_table_offset(start_address); + page = tab[offset]; + mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); + + offset = l1_table_offset(start_address); + + if ( start_address != (unsigned long)&shared_info ) + { + mmu_updates[count].ptr = + ((pgentry_t)mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; + mmu_updates[count].val = tab[offset] & ~_PAGE_RW; + count++; + } + else + printk("skipped %lx\n", start_address); + + start_address += PAGE_SIZE; + + if ( count == L1_PAGETABLE_ENTRIES || + start_address + PAGE_SIZE > end_address ) + { + rc = HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF); + if ( rc < 0 ) + { + printk("ERROR: set_readonly(): PTE could not be updated\n"); + do_exit(); + } + count = 0; + } + } + + { + mmuext_op_t op = { + .cmd = MMUEXT_TLB_FLUSH_ALL, + }; + int count; + HYPERVISOR_mmuext_op(&op, 1, &count, DOMID_SELF); + } +} + +/* + * A useful mem testing function. Write the address to every address in the + * range provided and read back the value. If verbose, print page walk to + * some VA + * + * If we get MEM_TEST_MAX_ERRORS we might as well stop + */ +#define MEM_TEST_MAX_ERRORS 10 +int mem_test(unsigned long *start_va, unsigned long *end_va, int verbose) +{ + unsigned long mask = 0x10000; + unsigned long *pointer; + int error_count = 0; + + /* write values and print page walks */ + if ( verbose && (((unsigned long)start_va) & 0xfffff) ) + { + printk("MemTest Start: 0x%p\n", start_va); + page_walk((unsigned long)start_va); + } + for ( pointer = start_va; pointer < end_va; pointer++ ) + { + if ( verbose && !(((unsigned long)pointer) & 0xfffff) ) + { + printk("Writing to %p\n", pointer); + page_walk((unsigned long)pointer); + } + *pointer = (unsigned long)pointer & ~mask; + } + if ( verbose && (((unsigned long)end_va) & 0xfffff) ) + { + printk("MemTest End: %p\n", end_va-1); + page_walk((unsigned long)end_va-1); + } + + /* verify values */ + for ( pointer = start_va; pointer < end_va; pointer++ ) + { + if ( ((unsigned long)pointer & ~mask) != *pointer ) + { + printk("Read error at 0x%lx. Read: 0x%lx, should read 0x%lx\n", + (unsigned long)pointer, *pointer, + ((unsigned long)pointer & ~mask)); + error_count++; + if ( error_count >= MEM_TEST_MAX_ERRORS ) + { + printk("mem_test: too many errors\n"); + return -1; + } + } + } + return 0; +} + + +/* + * get the PTE for virtual address va if it exists. Otherwise NULL. + */ +static pgentry_t *get_pgt(unsigned long va) +{ + unsigned long mfn; + pgentry_t *tab; + unsigned offset; + + tab = (pgentry_t *)start_info.pt_base; + mfn = virt_to_mfn(start_info.pt_base); + +#if defined(__x86_64__) + offset = l4_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + return NULL; + mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(mfn); +#endif + offset = l3_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + return NULL; + mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(mfn); + offset = l2_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + return NULL; + mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(mfn); + offset = l1_table_offset(va); + return &tab[offset]; +} + + +/* + * return a valid PTE for a given virtual address. If PTE does not exist, + * allocate page-table pages. + */ +pgentry_t *need_pgt(unsigned long va) +{ + unsigned long pt_mfn; + pgentry_t *tab; + unsigned long pt_pfn; + unsigned offset; + + tab = (pgentry_t *)start_info.pt_base; + pt_mfn = virt_to_mfn(start_info.pt_base); + +#if defined(__x86_64__) + offset = l4_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + { + pt_pfn = virt_to_pfn(alloc_page()); + new_pt_frame(&pt_pfn, pt_mfn, offset, L3_FRAME); + } + ASSERT(tab[offset] & _PAGE_PRESENT); + pt_mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(pt_mfn); +#endif + offset = l3_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + { + pt_pfn = virt_to_pfn(alloc_page()); + new_pt_frame(&pt_pfn, pt_mfn, offset, L2_FRAME); + } + ASSERT(tab[offset] & _PAGE_PRESENT); + pt_mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(pt_mfn); + offset = l2_table_offset(va); + if ( !(tab[offset] & _PAGE_PRESENT) ) + { + pt_pfn = virt_to_pfn(alloc_page()); + new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME); + } + ASSERT(tab[offset] & _PAGE_PRESENT); + pt_mfn = pte_to_mfn(tab[offset]); + tab = mfn_to_virt(pt_mfn); + + offset = l1_table_offset(va); + return &tab[offset]; +} + +/* + * Reserve an area of virtual address space for mappings and Heap + */ +static unsigned long demand_map_area_start; +#ifdef __x86_64__ +#define DEMAND_MAP_PAGES ((128ULL << 30) / PAGE_SIZE) +#else +#define DEMAND_MAP_PAGES ((2ULL << 30) / PAGE_SIZE) +#endif + +#ifndef HAVE_LIBC +#define HEAP_PAGES 0 +#else +unsigned long heap, brk, heap_mapped, heap_end; +#ifdef __x86_64__ +#define HEAP_PAGES ((128ULL << 30) / PAGE_SIZE) +#else +#define HEAP_PAGES ((1ULL << 30) / PAGE_SIZE) +#endif +#endif + +void arch_init_demand_mapping_area(unsigned long cur_pfn) +{ + cur_pfn++; + + demand_map_area_start = (unsigned long) pfn_to_virt(cur_pfn); + cur_pfn += DEMAND_MAP_PAGES; + printk("Demand map pfns at %lx-%p.\n", + demand_map_area_start, pfn_to_virt(cur_pfn)); + +#ifdef HAVE_LIBC + cur_pfn++; + heap_mapped = brk = heap = (unsigned long) pfn_to_virt(cur_pfn); + cur_pfn += HEAP_PAGES; + heap_end = (unsigned long) pfn_to_virt(cur_pfn); + printk("Heap resides at %lx-%lx.\n", brk, heap_end); +#endif +} + +unsigned long allocate_ondemand(unsigned long n, unsigned long alignment) +{ + unsigned long x; + unsigned long y = 0; + + /* Find a properly aligned run of n contiguous frames */ + for ( x = 0; + x <= DEMAND_MAP_PAGES - n; + x = (x + y + 1 + alignment - 1) & ~(alignment - 1) ) + { + unsigned long addr = demand_map_area_start + x * PAGE_SIZE; + pgentry_t *pgt = get_pgt(addr); + for ( y = 0; y < n; y++, addr += PAGE_SIZE ) + { + if ( !(addr & L1_MASK) ) + pgt = get_pgt(addr); + if ( pgt ) + { + if ( *pgt & _PAGE_PRESENT ) + break; + pgt++; + } + } + if ( y == n ) + break; + } + if ( y != n ) + { + printk("Failed to find %ld frames!\n", n); + return 0; + } + return demand_map_area_start + x * PAGE_SIZE; +} + +/* + * Map an array of MFNs contiguously into virtual address space starting at + * va. map f[i*stride]+i*increment for i in 0..n-1. + */ +#define MAP_BATCH ((STACK_SIZE / 2) / sizeof(mmu_update_t)) +void do_map_frames(unsigned long va, + const unsigned long *mfns, unsigned long n, + unsigned long stride, unsigned long incr, + domid_t id, int *err, unsigned long prot) +{ + pgentry_t *pgt = NULL; + unsigned long done = 0; + unsigned long i; + int rc; + + if ( !mfns ) + { + printk("do_map_frames: no mfns supplied\n"); + return; + } + DEBUG("va=%p n=0x%lx, mfns[0]=0x%lx stride=0x%lx incr=0x%lx prot=0x%lx\n", + va, n, mfns[0], stride, incr, prot); + + if ( err ) + memset(err, 0x00, n * sizeof(int)); + while ( done < n ) + { + unsigned long todo; + + if ( err ) + todo = 1; + else + todo = n - done; + + if ( todo > MAP_BATCH ) + todo = MAP_BATCH; + + { + mmu_update_t mmu_updates[todo]; + + for ( i = 0; i < todo; i++, va += PAGE_SIZE, pgt++) + { + if ( !pgt || !(va & L1_MASK) ) + pgt = need_pgt(va); + + mmu_updates[i].ptr = virt_to_mach(pgt) | MMU_NORMAL_PT_UPDATE; + mmu_updates[i].val = ((pgentry_t)(mfns[(done + i) * stride] + + (done + i) * incr) + << PAGE_SHIFT) | prot; + } + + rc = HYPERVISOR_mmu_update(mmu_updates, todo, NULL, id); + if ( rc < 0 ) + { + if (err) + err[done * stride] = rc; + else { + printk("Map %ld (%lx, ...) at %lx failed: %d.\n", + todo, mfns[done * stride] + done * incr, va, rc); + do_exit(); + } + } + } + done += todo; + } +} + +/* + * Map an array of MFNs contiguous into virtual address space. Virtual + * addresses are allocated from the on demand area. + */ +void *map_frames_ex(const unsigned long *mfns, unsigned long n, + unsigned long stride, unsigned long incr, + unsigned long alignment, + domid_t id, int *err, unsigned long prot) +{ + unsigned long va = allocate_ondemand(n, alignment); + + if ( !va ) + return NULL; + + do_map_frames(va, mfns, n, stride, incr, id, err, prot); + + return (void *)va; +} + +/* + * Unmap nun_frames frames mapped at virtual address va. + */ +#define UNMAP_BATCH ((STACK_SIZE / 2) / sizeof(multicall_entry_t)) +int unmap_frames(unsigned long va, unsigned long num_frames) +{ + int n = UNMAP_BATCH; + multicall_entry_t call[n]; + int ret; + int i; + + ASSERT(!((unsigned long)va & ~PAGE_MASK)); + + DEBUG("va=%p, num=0x%lx\n", va, num_frames); + + while ( num_frames ) { + if ( n > num_frames ) + n = num_frames; + + for ( i = 0; i < n; i++ ) + { + int arg = 0; + /* simply update the PTE for the VA and invalidate TLB */ + call[i].op = __HYPERVISOR_update_va_mapping; + call[i].args[arg++] = va; + call[i].args[arg++] = 0; +#ifdef __i386__ + call[i].args[arg++] = 0; +#endif + call[i].args[arg++] = UVMF_INVLPG; + + va += PAGE_SIZE; + } + + ret = HYPERVISOR_multicall(call, n); + if ( ret ) + { + printk("update_va_mapping hypercall failed with rc=%d.\n", ret); + return -ret; + } + + for ( i = 0; i < n; i++ ) + { + if ( call[i].result ) + { + printk("update_va_mapping failed for with rc=%d.\n", ret); + return -(call[i].result); + } + } + num_frames -= n; + } + return 0; +} + +/* + * Allocate pages which are contiguous in machine memory. + * Returns a VA to where they are mapped or 0 on failure. + * + * addr_bits indicates if the region has restrictions on where it is + * located. Typical values are 32 (if for example PCI devices can't access + * 64bit memory) or 0 for no restrictions. + * + * Allocated pages can be freed using the page allocators free_pages() + * function. + * + * based on Linux function xen_create_contiguous_region() + */ +#define MAX_CONTIG_ORDER 9 /* 2MB */ +unsigned long alloc_contig_pages(int order, unsigned int addr_bits) +{ + unsigned long in_va, va; + unsigned long in_frames[1UL << order], out_frames, mfn; + multicall_entry_t call[1UL << order]; + unsigned int i, num_pages = 1UL << order; + int ret, exch_success; + + /* pass in num_pages 'extends' of size 1 and + * request 1 extend of size 'order */ + struct xen_memory_exchange exchange = { + .in = { + .nr_extents = num_pages, + .extent_order = 0, + .domid = DOMID_SELF + }, + .out = { + .nr_extents = 1, + .extent_order = order, + .address_bits = addr_bits, + .domid = DOMID_SELF + }, + .nr_exchanged = 0 + }; + + if ( order > MAX_CONTIG_ORDER ) + { + printk("alloc_contig_pages: order too large 0x%x > 0x%x\n", + order, MAX_CONTIG_ORDER); + return 0; + } + + /* Allocate some potentially discontiguous pages */ + in_va = alloc_pages(order); + if ( !in_va ) + { + printk("alloc_contig_pages: could not get enough pages (order=0x%x\n", + order); + return 0; + } + + /* set up arguments for exchange hyper call */ + set_xen_guest_handle(exchange.in.extent_start, in_frames); + set_xen_guest_handle(exchange.out.extent_start, &out_frames); + + /* unmap current frames, keep a list of MFNs */ + for ( i = 0; i < num_pages; i++ ) + { + int arg = 0; + + va = in_va + (PAGE_SIZE * i); + in_frames[i] = virt_to_mfn(va); + + /* update P2M mapping */ + phys_to_machine_mapping[virt_to_pfn(va)] = INVALID_P2M_ENTRY; + + /* build multi call */ + call[i].op = __HYPERVISOR_update_va_mapping; + call[i].args[arg++] = va; + call[i].args[arg++] = 0; +#ifdef __i386__ + call[i].args[arg++] = 0; +#endif + call[i].args[arg++] = UVMF_INVLPG; + } + + ret = HYPERVISOR_multicall(call, i); + if ( ret ) + { + printk("Odd, update_va_mapping hypercall failed with rc=%d.\n", ret); + return 0; + } + + /* try getting a contig range of MFNs */ + out_frames = virt_to_pfn(in_va); /* PFNs to populate */ + ret = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); + if ( ret ) { + printk("mem exchanged order=0x%x failed with rc=%d, nr_exchanged=%lu\n", + order, ret, exchange.nr_exchanged); + /* we still need to return the allocated pages above to the pool + * ie. map them back into the 1:1 mapping etc. so we continue but + * in the end return the pages to the page allocator and return 0. */ + exch_success = 0; + } + else + exch_success = 1; + + /* map frames into 1:1 and update p2m */ + for ( i = 0; i < num_pages; i++ ) + { + int arg = 0; + pte_t pte; + + va = in_va + (PAGE_SIZE * i); + mfn = i < exchange.nr_exchanged ? (out_frames + i) : in_frames[i]; + pte = __pte(mfn << PAGE_SHIFT | L1_PROT); + + /* update P2M mapping */ + phys_to_machine_mapping[virt_to_pfn(va)] = mfn; + + /* build multi call */ + call[i].op = __HYPERVISOR_update_va_mapping; + call[i].args[arg++] = va; +#ifdef __x86_64__ + call[i].args[arg++] = (pgentry_t)pte.pte; +#else + call[i].args[arg++] = pte.pte_low; + call[i].args[arg++] = pte.pte_high; +#endif + call[i].args[arg++] = UVMF_INVLPG; + } + ret = HYPERVISOR_multicall(call, i); + if ( ret ) + { + printk("update_va_mapping hypercall no. 2 failed with rc=%d.\n", ret); + return 0; + } + + if ( !exch_success ) + { + /* since the exchanged failed we just free the pages as well */ + free_pages((void *) in_va, order); + return 0; + } + + return in_va; +} + +/* + * Clear some of the bootstrap memory + */ +static void clear_bootstrap(void) +{ + pte_t nullpte = { }; + int rc; + + /* Use first page as the CoW zero page */ + memset(&_text, 0, PAGE_SIZE); + mfn_zero = virt_to_mfn((unsigned long) &_text); + if ( (rc = HYPERVISOR_update_va_mapping(0, nullpte, UVMF_INVLPG)) ) + printk("Unable to unmap NULL page. rc=%d\n", rc); +} + +void arch_init_p2m(unsigned long max_pfn) +{ +#ifdef __x86_64__ +#define L1_P2M_SHIFT 9 +#define L2_P2M_SHIFT 18 +#define L3_P2M_SHIFT 27 +#else +#define L1_P2M_SHIFT 10 +#define L2_P2M_SHIFT 20 +#define L3_P2M_SHIFT 30 +#endif +#define L1_P2M_ENTRIES (1 << L1_P2M_SHIFT) +#define L2_P2M_ENTRIES (1 << (L2_P2M_SHIFT - L1_P2M_SHIFT)) +#define L3_P2M_ENTRIES (1 << (L3_P2M_SHIFT - L2_P2M_SHIFT)) +#define L1_P2M_MASK (L1_P2M_ENTRIES - 1) +#define L2_P2M_MASK (L2_P2M_ENTRIES - 1) +#define L3_P2M_MASK (L3_P2M_ENTRIES - 1) + + unsigned long *l1_list = NULL, *l2_list = NULL, *l3_list; + unsigned long pfn; + + l3_list = (unsigned long *)alloc_page(); + for ( pfn=0; pfn> L3_P2M_SHIFT) > 0 ) + { + printk("Error: Too many pfns.\n"); + do_exit(); + } + l3_list[(pfn >> L2_P2M_SHIFT)] = virt_to_mfn(l2_list); + } + if ( !(pfn % (L1_P2M_ENTRIES)) ) + { + l1_list = (unsigned long*)alloc_page(); + l2_list[(pfn >> L1_P2M_SHIFT) & L2_P2M_MASK] = + virt_to_mfn(l1_list); + } + + l1_list[pfn & L1_P2M_MASK] = pfn_to_mfn(pfn); + } + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = + virt_to_mfn(l3_list); + HYPERVISOR_shared_info->arch.max_pfn = max_pfn; +} + +void arch_init_mm(unsigned long* start_pfn_p, unsigned long* max_pfn_p) +{ + unsigned long start_pfn, max_pfn; + + printk(" _text: %p(VA)\n", &_text); + printk(" _etext: %p(VA)\n", &_etext); + printk(" _erodata: %p(VA)\n", &_erodata); + printk(" _edata: %p(VA)\n", &_edata); + printk("stack start: %p(VA)\n", stack); + printk(" _end: %p(VA)\n", &_end); + + /* First page follows page table pages and 3 more pages (store page etc) */ + start_pfn = PFN_UP(to_phys(start_info.pt_base)) + + start_info.nr_pt_frames + 3; + max_pfn = start_info.nr_pages; + + /* We need room for demand mapping and heap, clip available memory */ +#if defined(__i386__) + { + unsigned long virt_pfns = 1 + DEMAND_MAP_PAGES + 1 + HEAP_PAGES; + if (max_pfn + virt_pfns >= 0x100000) + max_pfn = 0x100000 - virt_pfns - 1; + } +#endif + + printk(" start_pfn: %lx\n", start_pfn); + printk(" max_pfn: %lx\n", max_pfn); + + build_pagetable(&start_pfn, &max_pfn); + clear_bootstrap(); + set_readonly(&_text, &_erodata); + + *start_pfn_p = start_pfn; + *max_pfn_p = max_pfn; +} + +grant_entry_t *arch_init_gnttab(int nr_grant_frames) +{ + struct gnttab_setup_table setup; + unsigned long frames[nr_grant_frames]; + + setup.dom = DOMID_SELF; + setup.nr_frames = nr_grant_frames; + set_xen_guest_handle(setup.frame_list, frames); + + HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); + return map_frames(frames, nr_grant_frames); +} diff -Nru xen-4.6.0/extras/mini-os/arch/x86/sched.c xen-4.6.5/extras/mini-os/arch/x86/sched.c --- xen-4.6.0/extras/mini-os/arch/x86/sched.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/sched.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,139 @@ +/* + **************************************************************************** + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: sched.c + * Author: Grzegorz Milos + * Changes: Robert Kaiser + * + * Date: Aug 2005 + * + * Environment: Xen Minimal OS + * Description: simple scheduler for Mini-Os + * + * The scheduler is non-preemptive (cooperative), and schedules according + * to Round Robin algorithm. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifdef SCHED_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=sched.c, line=%d) " _f "\n", __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + + +void dump_stack(struct thread *thread) +{ + unsigned long *bottom = (unsigned long *)(thread->stack + STACK_SIZE); + unsigned long *pointer = (unsigned long *)thread->sp; + int count; + if(thread == current) + { +#ifdef __i386__ + asm("movl %%esp,%0" + : "=r"(pointer)); +#else + asm("movq %%rsp,%0" + : "=r"(pointer)); +#endif + } + printk("The stack for \"%s\"\n", thread->name); + for(count = 0; count < 25 && pointer < bottom; count ++) + { + printk("[0x%p] 0x%lx\n", pointer, *pointer); + pointer++; + } + + if(pointer < bottom) printk(" ... continues.\n"); +} + +/* Gets run when a new thread is scheduled the first time ever, + defined in x86_[32/64].S */ +extern void thread_starter(void); + +/* Pushes the specified value onto the stack of the specified thread */ +static void stack_push(struct thread *thread, unsigned long value) +{ + thread->sp -= sizeof(unsigned long); + *((unsigned long *)thread->sp) = value; +} + +/* Architecture specific setup of thread creation */ +struct thread* arch_create_thread(char *name, void (*function)(void *), + void *data) +{ + struct thread *thread; + + thread = xmalloc(struct thread); + /* We can't use lazy allocation here since the trap handler runs on the stack */ + thread->stack = (char *)alloc_pages(STACK_SIZE_PAGE_ORDER); + thread->name = name; + printk("Thread \"%s\": pointer: 0x%p, stack: 0x%p\n", name, thread, + thread->stack); + + thread->sp = (unsigned long)thread->stack + STACK_SIZE; + /* Save pointer to the thread on the stack, used by current macro */ + *((unsigned long *)thread->stack) = (unsigned long)thread; + + /* Must ensure that (%rsp + 8) is 16-byte aligned at the start of thread_starter. */ + thread->sp -= sizeof(unsigned long); + + stack_push(thread, (unsigned long) function); + stack_push(thread, (unsigned long) data); + thread->ip = (unsigned long) thread_starter; + return thread; +} + +void run_idle_thread(void) +{ + /* Switch stacks and run the thread */ +#if defined(__i386__) + __asm__ __volatile__("mov %0,%%esp\n\t" + "push %1\n\t" + "ret" + :"=m" (idle_thread->sp) + :"m" (idle_thread->ip)); +#elif defined(__x86_64__) + __asm__ __volatile__("mov %0,%%rsp\n\t" + "push %1\n\t" + "ret" + :"=m" (idle_thread->sp) + :"m" (idle_thread->ip)); +#endif +} + + + diff -Nru xen-4.6.0/extras/mini-os/arch/x86/setup.c xen-4.6.5/extras/mini-os/arch/x86/setup.c --- xen-4.6.0/extras/mini-os/arch/x86/setup.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/setup.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,168 @@ +/****************************************************************************** + * common.c + * + * Common stuff special to x86 goes here. + * + * Copyright (c) 2002-2003, K A Fraser & R Neugebauer + * Copyright (c) 2005, Grzegorz Milos, Intel Research Cambridge + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include /* for printk, memcpy */ +#include +#include + +/* + * Shared page for communicating with the hypervisor. + * Events flags go here, for example. + */ +shared_info_t *HYPERVISOR_shared_info; + +/* + * This structure contains start-of-day info, such as pagetable base pointer, + * address of the shared_info structure, and things like that. + */ +union start_info_union start_info_union; + +/* + * Just allocate the kernel stack here. SS:ESP is set up to point here + * in head.S. + */ +char stack[2*STACK_SIZE]; + +extern char shared_info[PAGE_SIZE]; + +/* Assembler interface fns in entry.S. */ +void hypervisor_callback(void); +void failsafe_callback(void); + +#if defined(__x86_64__) +#define __pte(x) ((pte_t) { (x) } ) +#else +#define __pte(x) ({ unsigned long long _x = (x); \ + ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) +#endif + +static +shared_info_t *map_shared_info(unsigned long pa) +{ + int rc; + + if ( (rc = HYPERVISOR_update_va_mapping( + (unsigned long)shared_info, __pte(pa | 7), UVMF_INVLPG)) ) + { + printk("Failed to map shared_info!! rc=%d\n", rc); + do_exit(); + } + return (shared_info_t *)shared_info; +} + +static inline void fpu_init(void) { + asm volatile("fninit"); +} + +#ifdef __SSE__ +static inline void sse_init(void) { + unsigned long status = 0x1f80; + asm volatile("ldmxcsr %0" : : "m" (status)); +} +#else +#define sse_init() +#endif + + +/* + * INITIAL C ENTRY POINT. + */ +void +arch_init(start_info_t *si) +{ + static char hello[] = "Bootstrapping...\n"; + + (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(hello), hello); + + trap_init(); + + /*Initialize floating point unit */ + fpu_init(); + + /* Initialize SSE */ + sse_init(); + + /* Copy the start_info struct to a globally-accessible area. */ + /* WARN: don't do printk before here, it uses information from + shared_info. Use xprintk instead. */ + memcpy(&start_info, si, sizeof(*si)); + + /* print out some useful information */ + printk("Xen Minimal OS!\n"); + printk(" start_info: %p(VA)\n", si); + printk(" nr_pages: 0x%lx\n", si->nr_pages); + printk(" shared_inf: 0x%08lx(MA)\n", si->shared_info); + printk(" pt_base: %p(VA)\n", (void *)si->pt_base); + printk("nr_pt_frames: 0x%lx\n", si->nr_pt_frames); + printk(" mfn_list: %p(VA)\n", (void *)si->mfn_list); + printk(" mod_start: 0x%lx(VA)\n", si->mod_start); + printk(" mod_len: %lu\n", si->mod_len); + printk(" flags: 0x%x\n", (unsigned int)si->flags); + printk(" cmd_line: %s\n", + si->cmd_line ? (const char *)si->cmd_line : "NULL"); + printk(" stack: %p-%p\n", stack, stack + sizeof(stack)); + + /* set up minimal memory infos */ + phys_to_machine_mapping = (unsigned long *)start_info.mfn_list; + + /* Grab the shared_info pointer and put it in a safe place. */ + HYPERVISOR_shared_info = map_shared_info(start_info.shared_info); + + /* Set up event and failsafe callback addresses. */ +#ifdef __i386__ + HYPERVISOR_set_callbacks( + __KERNEL_CS, (unsigned long)hypervisor_callback, + __KERNEL_CS, (unsigned long)failsafe_callback); +#else + HYPERVISOR_set_callbacks( + (unsigned long)hypervisor_callback, + (unsigned long)failsafe_callback, 0); +#endif + + start_kernel(); +} + +void +arch_fini(void) +{ + /* Reset traps */ + trap_fini(); + +#ifdef __i386__ + HYPERVISOR_set_callbacks(0, 0, 0, 0); +#else + HYPERVISOR_set_callbacks(0, 0, 0); +#endif +} + +void +arch_do_exit(void) +{ + stack_walk(); +} diff -Nru xen-4.6.0/extras/mini-os/arch/x86/time.c xen-4.6.5/extras/mini-os/arch/x86/time.c --- xen-4.6.0/extras/mini-os/arch/x86/time.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/time.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,243 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2002-2003 - Keir Fraser - University of Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + * (C) 2006 - Robert Kaiser - FH Wiesbaden + **************************************************************************** + * + * File: time.c + * Author: Rolf Neugebauer and Keir Fraser + * Changes: Grzegorz Milos + * + * Description: Simple time and timer functions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include +#include +#include +#include +#include +#include +#include + +/************************************************************************ + * Time functions + *************************************************************************/ + +/* These are peridically updated in shared_info, and then copied here. */ +struct shadow_time_info { + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_timestamp; /* Time, in nanosecs, since boot. */ + uint32_t tsc_to_nsec_mul; + uint32_t tsc_to_usec_mul; + int tsc_shift; + uint32_t version; +}; +static struct timespec shadow_ts; +static uint32_t shadow_ts_version; + +static struct shadow_time_info shadow; + + +#ifndef rmb +#define rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#endif + +#define HANDLE_USEC_OVERFLOW(_tv) \ + do { \ + while ( (_tv)->tv_usec >= 1000000 ) \ + { \ + (_tv)->tv_usec -= 1000000; \ + (_tv)->tv_sec++; \ + } \ + } while ( 0 ) + +static inline int time_values_up_to_date(void) +{ + struct vcpu_time_info *src = &HYPERVISOR_shared_info->vcpu_info[0].time; + + return (shadow.version == src->version); +} + +static inline int wc_values_up_to_date(void) +{ + shared_info_t *s= HYPERVISOR_shared_info; + + return (shadow_ts_version == s->wc_version); +} + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline uint64_t scale_delta(uint64_t delta, uint32_t mul_frac, int shift) +{ + uint64_t product; +#ifdef __i386__ + uint32_t tmp1, tmp2; +#endif + + if ( shift < 0 ) + delta >>= -shift; + else + delta <<= shift; + +#ifdef __i386__ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "add %4,%%eax ; " + "xor %5,%5 ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), "2" (mul_frac) ); +#else + __asm__ ( + "mul %%rdx ; shrd $32,%%rdx,%%rax" + : "=a" (product) : "0" (delta), "d" ((uint64_t)mul_frac) ); +#endif + + return product; +} + + +static unsigned long get_nsec_offset(void) +{ + uint64_t now, delta; + rdtscll(now); + delta = now - shadow.tsc_timestamp; + return scale_delta(delta, shadow.tsc_to_nsec_mul, shadow.tsc_shift); +} + + +static void get_time_values_from_xen(void) +{ + struct vcpu_time_info *src = &HYPERVISOR_shared_info->vcpu_info[0].time; + + do { + shadow.version = src->version; + rmb(); + shadow.tsc_timestamp = src->tsc_timestamp; + shadow.system_timestamp = src->system_time; + shadow.tsc_to_nsec_mul = src->tsc_to_system_mul; + shadow.tsc_shift = src->tsc_shift; + rmb(); + } + while ((src->version & 1) | (shadow.version ^ src->version)); + + shadow.tsc_to_usec_mul = shadow.tsc_to_nsec_mul / 1000; +} + + + + +/* monotonic_clock(): returns # of nanoseconds passed since time_init() + * Note: This function is required to return accurate + * time even in the absence of multiple timer ticks. + */ +uint64_t monotonic_clock(void) +{ + uint64_t time; + uint32_t local_time_version; + + do { + local_time_version = shadow.version; + rmb(); + time = shadow.system_timestamp + get_nsec_offset(); + if (!time_values_up_to_date()) + get_time_values_from_xen(); + rmb(); + } while (local_time_version != shadow.version); + + return time; +} + +static void update_wallclock(void) +{ + shared_info_t *s = HYPERVISOR_shared_info; + + do { + shadow_ts_version = s->wc_version; + rmb(); + shadow_ts.tv_sec = s->wc_sec; + shadow_ts.tv_nsec = s->wc_nsec; + rmb(); + } + while ((s->wc_version & 1) | (shadow_ts_version ^ s->wc_version)); +} + + +int gettimeofday(struct timeval *tv, void *tz) +{ + uint64_t nsec = monotonic_clock(); + + if (!wc_values_up_to_date()) + update_wallclock(); + + nsec += shadow_ts.tv_nsec; + + tv->tv_sec = shadow_ts.tv_sec; + tv->tv_sec += NSEC_TO_SEC(nsec); + tv->tv_usec = NSEC_TO_USEC(nsec % 1000000000UL); + + return 0; +} + + +void block_domain(s_time_t until) +{ + ASSERT(irqs_disabled()); + if(monotonic_clock() < until) + { + HYPERVISOR_set_timer_op(until); + HYPERVISOR_sched_op(SCHEDOP_block, 0); + local_irq_disable(); + } +} + + +/* + * Just a dummy + */ +static void timer_handler(evtchn_port_t ev, struct pt_regs *regs, void *ign) +{ +} + + + +static evtchn_port_t port; +void init_time(void) +{ + printk("Initialising timer interface\n"); + port = bind_virq(VIRQ_TIMER, &timer_handler, NULL); + unmask_evtchn(port); +} + +void fini_time(void) +{ + /* Clear any pending timer */ + HYPERVISOR_set_timer_op(0); + unbind_evtchn(port); +} diff -Nru xen-4.6.0/extras/mini-os/arch/x86/traps.c xen-4.6.5/extras/mini-os/arch/x86/traps.c --- xen-4.6.0/extras/mini-os/arch/x86/traps.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/traps.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,333 @@ + +#include +#include +#include +#include +#include +#include + +/* + * These are assembler stubs in entry.S. + * They are the actual entry points for virtual exceptions. + */ +void divide_error(void); +void debug(void); +void int3(void); +void overflow(void); +void bounds(void); +void invalid_op(void); +void device_not_available(void); +void coprocessor_segment_overrun(void); +void invalid_TSS(void); +void segment_not_present(void); +void stack_segment(void); +void general_protection(void); +void page_fault(void); +void coprocessor_error(void); +void simd_coprocessor_error(void); +void alignment_check(void); +void spurious_interrupt_bug(void); +void machine_check(void); + + +void dump_regs(struct pt_regs *regs) +{ + printk("Thread: %s\n", current->name); +#ifdef __i386__ + printk("EIP: %lx, EFLAGS %lx.\n", regs->eip, regs->eflags); + printk("EBX: %08lx ECX: %08lx EDX: %08lx\n", + regs->ebx, regs->ecx, regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx EAX: %08lx\n", + regs->esi, regs->edi, regs->ebp, regs->eax); + printk("DS: %04x ES: %04x orig_eax: %08lx, eip: %08lx\n", + regs->xds, regs->xes, regs->orig_eax, regs->eip); + printk("CS: %04x EFLAGS: %08lx esp: %08lx ss: %04x\n", + regs->xcs, regs->eflags, regs->esp, regs->xss); +#else + printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); + printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", + regs->ss, regs->rsp, regs->eflags); + printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", + regs->rax, regs->rbx, regs->rcx); + printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", + regs->rdx, regs->rsi, regs->rdi); + printk("RBP: %016lx R08: %016lx R09: %016lx\n", + regs->rbp, regs->r8, regs->r9); + printk("R10: %016lx R11: %016lx R12: %016lx\n", + regs->r10, regs->r11, regs->r12); + printk("R13: %016lx R14: %016lx R15: %016lx\n", + regs->r13, regs->r14, regs->r15); +#endif +} + +static void do_trap(int trapnr, char *str, struct pt_regs * regs, unsigned long error_code) +{ + printk("FATAL: Unhandled Trap %d (%s), error code=0x%lx\n", trapnr, str, error_code); + printk("Regs address %p\n", regs); + dump_regs(regs); + do_exit(); +} + +#define DO_ERROR(trapnr, str, name) \ +void do_##name(struct pt_regs * regs, unsigned long error_code) \ +{ \ + do_trap(trapnr, str, regs, error_code); \ +} + +#define DO_ERROR_INFO(trapnr, str, name, sicode, siaddr) \ +void do_##name(struct pt_regs * regs, unsigned long error_code) \ +{ \ + do_trap(trapnr, str, regs, error_code); \ +} + +DO_ERROR_INFO( 0, "divide error", divide_error, FPE_INTDIV, regs->eip) +DO_ERROR( 3, "int3", int3) +DO_ERROR( 4, "overflow", overflow) +DO_ERROR( 5, "bounds", bounds) +DO_ERROR_INFO( 6, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR( 7, "device not available", device_not_available) +DO_ERROR( 9, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(10, "invalid TSS", invalid_TSS) +DO_ERROR(11, "segment not present", segment_not_present) +DO_ERROR(12, "stack segment", stack_segment) +DO_ERROR_INFO(17, "alignment check", alignment_check, BUS_ADRALN, 0) +DO_ERROR(18, "machine check", machine_check) + +void page_walk(unsigned long virt_address) +{ + pgentry_t *tab = (pgentry_t *)start_info.pt_base, page; + unsigned long addr = virt_address; + printk("Pagetable walk from virt %lx, base %lx:\n", virt_address, start_info.pt_base); + +#if defined(__x86_64__) + page = tab[l4_table_offset(addr)]; + tab = pte_to_virt(page); + printk(" L4 = %"PRIpte" (%p) [offset = %lx]\n", page, tab, l4_table_offset(addr)); +#endif + page = tab[l3_table_offset(addr)]; + tab = pte_to_virt(page); + printk(" L3 = %"PRIpte" (%p) [offset = %lx]\n", page, tab, l3_table_offset(addr)); + page = tab[l2_table_offset(addr)]; + tab = pte_to_virt(page); + printk(" L2 = %"PRIpte" (%p) [offset = %lx]\n", page, tab, l2_table_offset(addr)); + + page = tab[l1_table_offset(addr)]; + printk(" L1 = %"PRIpte" [offset = %lx]\n", page, l1_table_offset(addr)); + +} + +static int handle_cow(unsigned long addr) { + pgentry_t *tab = (pgentry_t *)start_info.pt_base, page; + unsigned long new_page; + int rc; + +#if defined(__x86_64__) + page = tab[l4_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + tab = pte_to_virt(page); +#endif + page = tab[l3_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + tab = pte_to_virt(page); + + page = tab[l2_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + tab = pte_to_virt(page); + + page = tab[l1_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + /* Only support CoW for the zero page. */ + if (PHYS_PFN(page) != mfn_zero) + return 0; + + new_page = alloc_pages(0); + memset((void*) new_page, 0, PAGE_SIZE); + + rc = HYPERVISOR_update_va_mapping(addr & PAGE_MASK, __pte(virt_to_mach(new_page) | L1_PROT), UVMF_INVLPG); + if (!rc) + return 1; + + printk("Map zero page to %lx failed: %d.\n", addr, rc); + return 0; +} + +static void do_stack_walk(unsigned long frame_base) +{ + unsigned long *frame = (void*) frame_base; + printk("base is %#lx ", frame_base); + printk("caller is %#lx\n", frame[1]); + if (frame[0]) + do_stack_walk(frame[0]); +} + +void stack_walk(void) +{ + unsigned long bp; +#ifdef __x86_64__ + asm("movq %%rbp, %0":"=r"(bp)); +#else + asm("movl %%ebp, %0":"=r"(bp)); +#endif + do_stack_walk(bp); +} + +static void dump_mem(unsigned long addr) +{ + unsigned long i; + if (addr < PAGE_SIZE) + return; + + for (i = ((addr)-16 ) & ~15; i < (((addr)+48 ) & ~15); i++) + { + if (!(i%16)) + printk("\n%lx:", i); + printk(" %02x", *(unsigned char *)i); + } + printk("\n"); +} +#define read_cr2() \ + (HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].arch.cr2) + +static int handling_pg_fault = 0; + +void do_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + unsigned long addr = read_cr2(); + struct sched_shutdown sched_shutdown = { .reason = SHUTDOWN_crash }; + + if ((error_code & TRAP_PF_WRITE) && handle_cow(addr)) + return; + + /* If we are already handling a page fault, and got another one + that means we faulted in pagetable walk. Continuing here would cause + a recursive fault */ + if(handling_pg_fault == 1) + { + printk("Page fault in pagetable walk (access to invalid memory?).\n"); + HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); + } + handling_pg_fault++; + barrier(); + +#if defined(__x86_64__) + printk("Page fault at linear address %lx, rip %lx, regs %p, sp %lx, our_sp %p, code %lx\n", + addr, regs->rip, regs, regs->rsp, &addr, error_code); +#else + printk("Page fault at linear address %lx, eip %lx, regs %p, sp %lx, our_sp %p, code %lx\n", + addr, regs->eip, regs, regs->esp, &addr, error_code); +#endif + + dump_regs(regs); +#if defined(__x86_64__) + do_stack_walk(regs->rbp); + dump_mem(regs->rsp); + dump_mem(regs->rbp); + dump_mem(regs->rip); +#else + do_stack_walk(regs->ebp); + dump_mem(regs->esp); + dump_mem(regs->ebp); + dump_mem(regs->eip); +#endif + page_walk(addr); + HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); + /* We should never get here ... but still */ + handling_pg_fault--; +} + +void do_general_protection(struct pt_regs *regs, long error_code) +{ + struct sched_shutdown sched_shutdown = { .reason = SHUTDOWN_crash }; +#ifdef __i386__ + printk("GPF eip: %lx, error_code=%lx\n", regs->eip, error_code); +#else + printk("GPF rip: %lx, error_code=%lx\n", regs->rip, error_code); +#endif + dump_regs(regs); +#if defined(__x86_64__) + do_stack_walk(regs->rbp); + dump_mem(regs->rsp); + dump_mem(regs->rbp); + dump_mem(regs->rip); +#else + do_stack_walk(regs->ebp); + dump_mem(regs->esp); + dump_mem(regs->ebp); + dump_mem(regs->eip); +#endif + HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); +} + + +void do_debug(struct pt_regs * regs) +{ + printk("Debug exception\n"); +#define TF_MASK 0x100 + regs->eflags &= ~TF_MASK; + dump_regs(regs); + do_exit(); +} + +void do_coprocessor_error(struct pt_regs * regs) +{ + printk("Copro error\n"); + dump_regs(regs); + do_exit(); +} + +void simd_math_error(void *eip) +{ + printk("SIMD error\n"); +} + +void do_simd_coprocessor_error(struct pt_regs * regs) +{ + printk("SIMD copro error\n"); +} + +void do_spurious_interrupt_bug(struct pt_regs * regs) +{ +} + +/* + * Submit a virtual IDT to teh hypervisor. This consists of tuples + * (interrupt vector, privilege ring, CS:EIP of handler). + * The 'privilege ring' field specifies the least-privileged ring that + * can trap to that vector using a software-interrupt instruction (INT). + */ +static trap_info_t trap_table[] = { + { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, + { 1, 0, __KERNEL_CS, (unsigned long)debug }, + { 3, 3, __KERNEL_CS, (unsigned long)int3 }, + { 4, 3, __KERNEL_CS, (unsigned long)overflow }, + { 5, 3, __KERNEL_CS, (unsigned long)bounds }, + { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, + { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, + { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, + { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, + { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, + { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, + { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, + { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, + { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug }, + { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, + { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, + { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, + { 0, 0, 0, 0 } +}; + + + +void trap_init(void) +{ + HYPERVISOR_set_trap_table(trap_table); +} + +void trap_fini(void) +{ + HYPERVISOR_set_trap_table(NULL); +} diff -Nru xen-4.6.0/extras/mini-os/arch/x86/x86_32.S xen-4.6.5/extras/mini-os/arch/x86/x86_32.S --- xen-4.6.0/extras/mini-os/arch/x86/x86_32.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/x86_32.S 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,305 @@ +#include +#include +#include + +.section __xen_guest + .ascii "GUEST_OS=Mini-OS" + .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0x0" /* &_text from minios_x86_32.lds */ + .ascii ",ELF_PADDR_OFFSET=0x0" + .ascii ",HYPERCALL_PAGE=0x2" + .ascii ",PAE=yes[extended-cr3]" + .ascii ",LOADER=generic" + .byte 0 +.text + +.globl _start, shared_info, hypercall_page + +_start: + cld + lss stack_start,%esp + andl $(~(__STACK_SIZE-1)), %esp + push %esi + call arch_init + +stack_start: + .long stack+(2*__STACK_SIZE), __KERNEL_SS + + /* Unpleasant -- the PTE that maps this page is actually overwritten */ + /* to map the real shared-info page! :-) */ + .org 0x1000 +shared_info: + .org 0x2000 + +hypercall_page: + .org 0x3000 + +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C + +#define ENTRY(X) .globl X ; X : + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(__KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ + popl %ds; \ + popl %es; \ + addl $4,%esp; \ + iret; + +ENTRY(divide_error) + pushl $0 # no error code + pushl $do_divide_error +do_exception: + pushl %ds + pushl %eax + xorl %eax, %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es, %ecx + movl ES(%esp), %edi # get the function address + movl ORIG_EAX(%esp), %edx # get the error code + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl $(__KERNEL_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + movl %esp,%eax # pt_regs pointer + pushl %edx + pushl %eax + call *%edi + jmp ret_from_exception + +ret_from_exception: + movb CS(%esp),%cl + addl $8,%esp + RESTORE_ALL + +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until weve done all processing. HOWEVER, we must enable events before +# popping the stack frame (cant be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so wed +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +ENTRY(hypervisor_callback) + pushl %eax + SAVE_ALL + movl EIP(%esp),%eax + cmpl $scrit,%eax + jb 11f + cmpl $ecrit,%eax + jb critical_region_fixup +11: push %esp + xorl %ebp,%ebp + call do_hypervisor_callback + add $4,%esp + movl HYPERVISOR_shared_info,%esi + xorl %eax,%eax + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne safesti +safesti:movb $0,1(%esi) # reenable event callbacks +scrit: /**** START OF CRITICAL REGION ****/ + testb $0xFF,(%esi) + jnz 14f # process more events if necessary... + RESTORE_ALL +14: movb $1,1(%esi) + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + mov %esp,%esi + add %eax,%esi # %esi points at end of src region + mov %esp,%edi + add $0x34,%edi # %edi points at end of dst region + mov %eax,%ecx + shr $2,%ecx # convert words to bytes + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp 11b + +critical_fixup_table: + .byte 0x00,0x00,0x00 # testb $0xff,(%esi) + .byte 0x00,0x00 # jne 14f + .byte 0x00 # pop %ebx + .byte 0x04 # pop %ecx + .byte 0x08 # pop %edx + .byte 0x0c # pop %esi + .byte 0x10 # pop %edi + .byte 0x14 # pop %ebp + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es + .byte 0x24,0x24,0x24 # add $4,%esp + .byte 0x28 # iret + .byte 0x00,0x00,0x00,0x00 # movb $1,1(%esi) + .byte 0x00,0x00 # jmp 11b + +# Hypervisor uses this for application faults while it executes. +ENTRY(failsafe_callback) + pop %ds + pop %es + pop %fs + pop %gs + iret + +ENTRY(coprocessor_error) + pushl $0 + pushl $do_coprocessor_error + jmp do_exception + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $do_simd_coprocessor_error + jmp do_exception + +ENTRY(device_not_available) + iret + +ENTRY(debug) + pushl $0 + pushl $do_debug + jmp do_exception + +ENTRY(int3) + pushl $0 + pushl $do_int3 + jmp do_exception + +ENTRY(overflow) + pushl $0 + pushl $do_overflow + jmp do_exception + +ENTRY(bounds) + pushl $0 + pushl $do_bounds + jmp do_exception + +ENTRY(invalid_op) + pushl $0 + pushl $do_invalid_op + jmp do_exception + + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $do_coprocessor_segment_overrun + jmp do_exception + + +ENTRY(invalid_TSS) + pushl $do_invalid_TSS + jmp do_exception + + +ENTRY(segment_not_present) + pushl $do_segment_not_present + jmp do_exception + + +ENTRY(stack_segment) + pushl $do_stack_segment + jmp do_exception + + +ENTRY(general_protection) + pushl $do_general_protection + jmp do_exception + + +ENTRY(alignment_check) + pushl $do_alignment_check + jmp do_exception + + +ENTRY(page_fault) + pushl $do_page_fault + jmp do_exception + +ENTRY(machine_check) + pushl $0 + pushl $do_machine_check + jmp do_exception + + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $do_spurious_interrupt_bug + jmp do_exception + + + +ENTRY(thread_starter) + popl %eax + popl %ebx + pushl $0 + xorl %ebp,%ebp + pushl %eax + call *%ebx + call exit_thread + +ENTRY(__arch_switch_threads) + movl 4(%esp), %ecx /* prev */ + movl 8(%esp), %edx /* next */ + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl %esp, (%ecx) /* save ESP */ + movl (%edx), %esp /* restore ESP */ + movl $1f, 4(%ecx) /* save EIP */ + pushl 4(%edx) /* restore EIP */ + ret +1: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret diff -Nru xen-4.6.0/extras/mini-os/arch/x86/x86_64.S xen-4.6.5/extras/mini-os/arch/x86/x86_64.S --- xen-4.6.0/extras/mini-os/arch/x86/x86_64.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/arch/x86/x86_64.S 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,383 @@ +#include +#include +#include + +.section __xen_guest + .ascii "GUEST_OS=Mini-OS" + .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0x0" /* &_text from minios_x86_64.lds */ + .ascii ",ELF_PADDR_OFFSET=0x0" + .ascii ",HYPERCALL_PAGE=0x2" + .ascii ",LOADER=generic" + .byte 0 +.text + +#define ENTRY(X) .globl X ; X : +.globl _start, shared_info, hypercall_page + + +_start: + cld + movq stack_start(%rip),%rsp + andq $(~(__STACK_SIZE-1)), %rsp + movq %rsi,%rdi + call arch_init + +stack_start: + .quad stack+(2*__STACK_SIZE) + + /* Unpleasant -- the PTE that maps this page is actually overwritten */ + /* to map the real shared-info page! :-) */ + .org 0x1000 +shared_info: + .org 0x2000 + +hypercall_page: + .org 0x3000 + + +#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg +#define XEN_PUT_VCPU_INFO(reg) +#define XEN_PUT_VCPU_INFO_fixup +#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg) +#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg) +#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg) + +#define XEN_BLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \ + XEN_LOCKED_BLOCK_EVENTS(reg) ; \ + XEN_PUT_VCPU_INFO(reg) + +#define XEN_UNBLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \ + XEN_LOCKED_UNBLOCK_EVENTS(reg) ; \ + XEN_PUT_VCPU_INFO(reg) + + +/* Offsets into shared_info_t. */ +#define evtchn_upcall_pending /* 0 */ +#define evtchn_upcall_mask 1 + +NMI_MASK = 0x80000000 +KERNEL_CS_MASK = 0xfc + +#define RAX 80 +#define RDI 112 +#define ORIG_RAX 120 /* + error_code */ +#define RIP 128 +#define CS 136 +#define RFLAGS 144 +#define RSP 152 + + +/* Macros */ +.macro zeroentry sym + movq (%rsp),%rcx + movq 8(%rsp),%r11 + addq $0x10,%rsp /* skip rcx and r11 */ + pushq $0 /* push error code/oldrax */ + pushq %rax /* push real oldrax to the rdi slot */ + leaq \sym(%rip),%rax + jmp error_entry +.endm + +.macro errorentry sym + movq (%rsp),%rcx + movq 8(%rsp),%r11 + addq $0x10,%rsp /* rsp points to the error code */ + pushq %rax + leaq \sym(%rip),%rax + jmp error_entry +.endm + +.macro RESTORE_ALL + movq (%rsp),%r11 + movq 1*8(%rsp),%r10 + movq 2*8(%rsp),%r9 + movq 3*8(%rsp),%r8 + movq 4*8(%rsp),%rax + movq 5*8(%rsp),%rcx + movq 6*8(%rsp),%rdx + movq 7*8(%rsp),%rsi + movq 8*8(%rsp),%rdi + addq $9*8+8,%rsp +.endm + +.macro RESTORE_REST + movq (%rsp),%r15 + movq 1*8(%rsp),%r14 + movq 2*8(%rsp),%r13 + movq 3*8(%rsp),%r12 + movq 4*8(%rsp),%rbp + movq 5*8(%rsp),%rbx + addq $6*8,%rsp +.endm + +.macro SAVE_REST + subq $6*8,%rsp + movq %rbx,5*8(%rsp) + movq %rbp,4*8(%rsp) + movq %r12,3*8(%rsp) + movq %r13,2*8(%rsp) + movq %r14,1*8(%rsp) + movq %r15,(%rsp) +.endm + +.macro HYPERVISOR_IRET flag + testl $NMI_MASK,2*8(%rsp) + jnz 2f + + /* Direct iret to kernel space. Correct CS and SS. */ + orb $3,1*8(%rsp) + orb $3,4*8(%rsp) + iretq + +2: /* Slow iret via hypervisor. */ + andl $~NMI_MASK, 16(%rsp) + pushq $\flag + jmp hypercall_page + (__HYPERVISOR_iret * 32) +.endm + + +/* + * Exception entry point. This expects an error code/orig_rax on the stack + * and the exception handler in %rax. + */ +ENTRY(error_entry) + /* rdi slot contains rax, oldrax contains error code */ + cld + subq $14*8,%rsp + movq %rsi,13*8(%rsp) + movq 14*8(%rsp),%rsi /* load rax from rdi slot */ + movq %rdx,12*8(%rsp) + movq %rcx,11*8(%rsp) + movq %rsi,10*8(%rsp) /* store rax */ + movq %r8, 9*8(%rsp) + movq %r9, 8*8(%rsp) + movq %r10,7*8(%rsp) + movq %r11,6*8(%rsp) + movq %rbx,5*8(%rsp) + movq %rbp,4*8(%rsp) + movq %r12,3*8(%rsp) + movq %r13,2*8(%rsp) + movq %r14,1*8(%rsp) + movq %r15,(%rsp) + +error_call_handler: + movq %rdi, RDI(%rsp) + movq %rsp,%rdi + movq ORIG_RAX(%rsp),%rsi # get error code + movq $-1,ORIG_RAX(%rsp) + call *%rax + jmp error_exit + + +/* + * Xen event (virtual interrupt) entry point. + */ +ENTRY(hypervisor_callback) + zeroentry hypervisor_callback2 + +ENTRY(hypervisor_callback2) + movq %rdi, %rsp + + /* check against event re-entrant */ + movq RIP(%rsp),%rax + cmpq $scrit,%rax + jb 11f + cmpq $ecrit,%rax + jb critical_region_fixup + +11: movq %gs:8,%rax + incl %gs:0 + cmovzq %rax,%rsp + pushq %rdi + call do_hypervisor_callback + popq %rsp + decl %gs:0 + +error_exit: +retint_kernel: + movl RFLAGS(%rsp), %eax + shr $9, %eax # EAX[0] == IRET_RFLAGS.IF + XEN_GET_VCPU_INFO(%rsi) + andb evtchn_upcall_mask(%rsi),%al + andb $1,%al # EAX[0] == IRET_RFLAGS.IF & event_mask + jnz restore_all_enable_events # != 0 => enable event delivery + XEN_PUT_VCPU_INFO(%rsi) + +retint_restore_args: + RESTORE_REST + RESTORE_ALL + HYPERVISOR_IRET 0 + +restore_all_enable_events: + RESTORE_REST + RESTORE_ALL + pushq %rax # save rax for it will be clobbered later + RSP_OFFSET=8 # record the stack frame layout changes + XEN_GET_VCPU_INFO(%rax) # safe to use rax since it is saved + XEN_UNBLOCK_EVENTS(%rax) + +scrit: /**** START OF CRITICAL REGION ****/ + XEN_TEST_PENDING(%rax) + jz 12f + XEN_LOCKED_BLOCK_EVENTS(%rax) # if pending, mask events and handle + # by jumping to hypervisor_prologue +12: popq %rax # all registers restored from this point + +restore_end: + jnz hypervisor_prologue # safe to jump out of critical region + # because events are masked if ZF = 0 + HYPERVISOR_IRET 0 +ecrit: /**** END OF CRITICAL REGION ****/ + +# Set up the stack as Xen does before calling event callback +hypervisor_prologue: + pushq %r11 + pushq %rcx + jmp hypervisor_callback + +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so if rax has been +# restored. We determine by comparing interrupted rip with "restore_end". +# We always copy all registers below RIP from the current stack frame +# to the end of the previous activation frame so that we can continue +# as if we've never even reached 11 running in the old activation frame. + +critical_region_fixup: + # Set up source and destination region pointers + leaq RIP(%rsp),%rsi # esi points at end of src region + # Acquire interrupted rsp which was saved-on-stack. This points to + # the end of dst region. Note that it is not necessarily current rsp + # plus 0xb0, because the second interrupt might align the stack frame. + movq RSP(%rsp),%rdi # edi points at end of dst region + + cmpq $restore_end,%rax + jae 13f + + # If interrupted rip is before restore_end + # then rax hasn't been restored yet + movq (%rdi),%rax + movq %rax, RAX(%rsp) # save rax + addq $RSP_OFFSET,%rdi + + # Set up the copy +13: movq $RIP,%rcx + shr $3,%rcx # convert bytes into count of 64-bit entities +15: subq $8,%rsi # pre-decrementing copy loop + subq $8,%rdi + movq (%rsi),%rax + movq %rax,(%rdi) + loop 15b +16: movq %rdi,%rsp # final rdi is top of merged stack + andb $KERNEL_CS_MASK,CS(%rsp) # CS might have changed + jmp 11b + + + +ENTRY(failsafe_callback) + popq %rcx + popq %r11 + iretq + + +ENTRY(coprocessor_error) + zeroentry do_coprocessor_error + + +ENTRY(simd_coprocessor_error) + zeroentry do_simd_coprocessor_error + + +ENTRY(device_not_available) + zeroentry do_device_not_available + + +ENTRY(debug) + zeroentry do_debug + + +ENTRY(int3) + zeroentry do_int3 + +ENTRY(overflow) + zeroentry do_overflow + + +ENTRY(bounds) + zeroentry do_bounds + + +ENTRY(invalid_op) + zeroentry do_invalid_op + + +ENTRY(coprocessor_segment_overrun) + zeroentry do_coprocessor_segment_overrun + + +ENTRY(invalid_TSS) + errorentry do_invalid_TSS + + +ENTRY(segment_not_present) + errorentry do_segment_not_present + + +/* runs on exception stack */ +ENTRY(stack_segment) + errorentry do_stack_segment + + +ENTRY(general_protection) + errorentry do_general_protection + + +ENTRY(alignment_check) + errorentry do_alignment_check + + +ENTRY(divide_error) + zeroentry do_divide_error + + +ENTRY(spurious_interrupt_bug) + zeroentry do_spurious_interrupt_bug + + +ENTRY(page_fault) + errorentry do_page_fault + + + + + +ENTRY(thread_starter) + popq %rdi + popq %rbx + pushq $0 + xorq %rbp,%rbp + call *%rbx + call exit_thread + + +ENTRY(__arch_switch_threads) + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp, (%rdi) /* save ESP */ + movq (%rsi), %rsp /* restore ESP */ + movq $1f, 8(%rdi) /* save EIP */ + pushq 8(%rsi) /* restore EIP */ + ret +1: + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + ret diff -Nru xen-4.6.0/extras/mini-os/blkfront.c xen-4.6.5/extras/mini-os/blkfront.c --- xen-4.6.0/extras/mini-os/blkfront.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/blkfront.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,736 @@ +/* Minimal block driver for Mini-OS. + * Copyright (c) 2007-2008 Samuel Thibault. + * Based on netfront.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef HAVE_LIBC +#define strtoul simple_strtoul +#endif + +/* Note: we generally don't need to disable IRQs since we hardly do anything in + * the interrupt handler. */ + +/* Note: we really suppose non-preemptive threads. */ + +DECLARE_WAIT_QUEUE_HEAD(blkfront_queue); + + + + +#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) +#define GRANT_INVALID_REF 0 + + +struct blk_buffer { + void* page; + grant_ref_t gref; +}; + +struct blkfront_dev { + domid_t dom; + + struct blkif_front_ring ring; + grant_ref_t ring_ref; + evtchn_port_t evtchn; + blkif_vdev_t handle; + + char *nodename; + char *backend; + struct blkfront_info info; + + xenbus_event_queue events; + +#ifdef HAVE_LIBC + int fd; +#endif +}; + +void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ +#ifdef HAVE_LIBC + struct blkfront_dev *dev = data; + int fd = dev->fd; + + if (fd != -1) + files[fd].read = 1; +#endif + wake_up(&blkfront_queue); +} + +static void free_blkfront(struct blkfront_dev *dev) +{ + mask_evtchn(dev->evtchn); + + free(dev->backend); + + gnttab_end_access(dev->ring_ref); + free_page(dev->ring.sring); + + unbind_evtchn(dev->evtchn); + + free(dev->nodename); + free(dev); +} + +struct blkfront_dev *init_blkfront(char *_nodename, struct blkfront_info *info) +{ + xenbus_transaction_t xbt; + char* err; + char* message=NULL; + struct blkif_sring *s; + int retry=0; + char* msg = NULL; + char* c; + char* nodename = _nodename ? _nodename : "device/vbd/768"; + + struct blkfront_dev *dev; + + char path[strlen(nodename) + strlen("/backend-id") + 1]; + + printk("******************* BLKFRONT for %s **********\n\n\n", nodename); + + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dev->dom = xenbus_read_integer(path); + evtchn_alloc_unbound(dev->dom, blkfront_handler, dev, &dev->evtchn); + + s = (struct blkif_sring*) alloc_page(); + memset(s,0,PAGE_SIZE); + + + SHARED_RING_INIT(s); + FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE); + + dev->ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(s),0); + + dev->events = NULL; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "ring-ref","%u", + dev->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "event-channel", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); + if (err) { + message = "writing protocol"; + goto abort_transaction; + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateConnected); + if (err) { + message = "switching state"; + goto abort_transaction; + } + + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + if (msg) { + printk("Error %s when reading the backend path %s\n", msg, path); + goto error; + } + + printk("backend at %s\n", dev->backend); + + dev->handle = strtoul(strrchr(nodename, '/')+1, NULL, 0); + + { + XenbusState state; + char path[strlen(dev->backend) + strlen("/feature-flush-cache") + 1]; + snprintf(path, sizeof(path), "%s/mode", dev->backend); + msg = xenbus_read(XBT_NIL, path, &c); + if (msg) { + printk("Error %s when reading the mode\n", msg); + goto error; + } + if (*c == 'w') + dev->info.mode = O_RDWR; + else + dev->info.mode = O_RDONLY; + free(c); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + + msg = NULL; + state = xenbus_read_integer(path); + while (msg == NULL && state < XenbusStateConnected) + msg = xenbus_wait_for_state_change(path, &state, &dev->events); + if (msg != NULL || state != XenbusStateConnected) { + printk("backend not available, state=%d\n", state); + xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + + snprintf(path, sizeof(path), "%s/info", dev->backend); + dev->info.info = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/sectors", dev->backend); + // FIXME: read_integer returns an int, so disk size limited to 1TB for now + dev->info.sectors = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/sector-size", dev->backend); + dev->info.sector_size = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend); + dev->info.barrier = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend); + dev->info.flush = xenbus_read_integer(path); + + *info = dev->info; + } + unmask_evtchn(dev->evtchn); + + printk("%lu sectors of %u bytes\n", (unsigned long) dev->info.sectors, dev->info.sector_size); + printk("**************************\n"); + + return dev; + +error: + free(msg); + free(err); + free_blkfront(dev); + return NULL; +} + +void shutdown_blkfront(struct blkfront_dev *dev) +{ + char* err = NULL, *err2; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/event-channel") + 1]; + + blkfront_sync(dev); + + printk("close blk: backend=%s node=%s\n", dev->backend, dev->nodename); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("shutdown_blkfront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("shutdown_blkfront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close; + } + state = xenbus_read_integer(path); + while (state < XenbusStateClosed) { + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + } + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { + printk("shutdown_blkfront: error changing state to %d: %s\n", + XenbusStateInitialising, err); + goto close; + } + state = xenbus_read_integer(path); + while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed)) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + +close: + free(err); + err2 = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err2); + + snprintf(nodename, sizeof(nodename), "%s/ring-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + + if (!err) + free_blkfront(dev); +} + +static void blkfront_wait_slot(struct blkfront_dev *dev) +{ + /* Wait for a slot */ + if (RING_FULL(&dev->ring)) { + unsigned long flags; + DEFINE_WAIT(w); + local_irq_save(flags); + while (1) { + blkfront_aio_poll(dev); + if (!RING_FULL(&dev->ring)) + break; + /* Really no slot, go to sleep. */ + add_waiter(w, blkfront_queue); + local_irq_restore(flags); + schedule(); + local_irq_save(flags); + } + remove_waiter(w, blkfront_queue); + local_irq_restore(flags); + } +} + +/* Issue an aio */ +void blkfront_aio(struct blkfront_aiocb *aiocbp, int write) +{ + struct blkfront_dev *dev = aiocbp->aio_dev; + struct blkif_request *req; + RING_IDX i; + int notify; + int n, j; + uintptr_t start, end; + + // Can't io at non-sector-aligned location + ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1))); + // Can't io non-sector-sized amounts + ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1))); + // Can't io non-sector-aligned buffer + ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1))); + + start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK; + end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK; + aiocbp->n = n = (end - start) / PAGE_SIZE; + + /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB, + * so max 44KB can't happen */ + ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST); + + blkfront_wait_slot(dev); + i = dev->ring.req_prod_pvt; + req = RING_GET_REQUEST(&dev->ring, i); + + req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ; + req->nr_segments = n; + req->handle = dev->handle; + req->id = (uintptr_t) aiocbp; + req->sector_number = aiocbp->aio_offset / 512; + + for (j = 0; j < n; j++) { + req->seg[j].first_sect = 0; + req->seg[j].last_sect = PAGE_SIZE / 512 - 1; + } + req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 512; + req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / 512; + for (j = 0; j < n; j++) { + uintptr_t data = start + j * PAGE_SIZE; + if (!write) { + /* Trigger CoW if needed */ + *(char*)(data + (req->seg[j].first_sect << 9)) = 0; + barrier(); + } + aiocbp->gref[j] = req->seg[j].gref = + gnttab_grant_access(dev->dom, virtual_to_mfn(data), write); + } + + dev->ring.req_prod_pvt = i + 1; + + wmb(); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify); + + if(notify) notify_remote_via_evtchn(dev->evtchn); +} + +static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret) +{ + aiocbp->data = (void*) 1; + aiocbp->aio_cb = NULL; +} + +void blkfront_io(struct blkfront_aiocb *aiocbp, int write) +{ + unsigned long flags; + DEFINE_WAIT(w); + + ASSERT(!aiocbp->aio_cb); + aiocbp->aio_cb = blkfront_aio_cb; + blkfront_aio(aiocbp, write); + aiocbp->data = NULL; + + local_irq_save(flags); + while (1) { + blkfront_aio_poll(aiocbp->aio_dev); + if (aiocbp->data) + break; + + add_waiter(w, blkfront_queue); + local_irq_restore(flags); + schedule(); + local_irq_save(flags); + } + remove_waiter(w, blkfront_queue); + local_irq_restore(flags); +} + +static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op, uint64_t id) +{ + int i; + struct blkif_request *req; + int notify; + + blkfront_wait_slot(dev); + i = dev->ring.req_prod_pvt; + req = RING_GET_REQUEST(&dev->ring, i); + req->operation = op; + req->nr_segments = 0; + req->handle = dev->handle; + req->id = id; + /* Not needed anyway, but the backend will check it */ + req->sector_number = 0; + dev->ring.req_prod_pvt = i + 1; + wmb(); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify); + if (notify) notify_remote_via_evtchn(dev->evtchn); +} + +void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op) +{ + struct blkfront_dev *dev = aiocbp->aio_dev; + blkfront_push_operation(dev, op, (uintptr_t) aiocbp); +} + +void blkfront_sync(struct blkfront_dev *dev) +{ + unsigned long flags; + DEFINE_WAIT(w); + + if (dev->info.mode == O_RDWR) { + if (dev->info.barrier == 1) + blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0); + + if (dev->info.flush == 1) + blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0); + } + + /* Note: This won't finish if another thread enqueues requests. */ + local_irq_save(flags); + while (1) { + blkfront_aio_poll(dev); + if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring)) + break; + + add_waiter(w, blkfront_queue); + local_irq_restore(flags); + schedule(); + local_irq_save(flags); + } + remove_waiter(w, blkfront_queue); + local_irq_restore(flags); +} + +int blkfront_aio_poll(struct blkfront_dev *dev) +{ + RING_IDX rp, cons; + struct blkif_response *rsp; + int more; + int nr_consumed; + +moretodo: +#ifdef HAVE_LIBC + if (dev->fd != -1) { + files[dev->fd].read = 0; + mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */ + } +#endif + + rp = dev->ring.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + cons = dev->ring.rsp_cons; + + nr_consumed = 0; + while ((cons != rp)) + { + struct blkfront_aiocb *aiocbp; + int status; + + rsp = RING_GET_RESPONSE(&dev->ring, cons); + nr_consumed++; + + aiocbp = (void*) (uintptr_t) rsp->id; + status = rsp->status; + + switch (rsp->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + { + int j; + + if (status != BLKIF_RSP_OKAY) + printk("%s error %d on %s at offset %llu, num bytes %llu\n", + rsp->operation == BLKIF_OP_READ?"read":"write", + status, aiocbp->aio_dev->nodename, + (unsigned long long) aiocbp->aio_offset, + (unsigned long long) aiocbp->aio_nbytes); + + for (j = 0; j < aiocbp->n; j++) + gnttab_end_access(aiocbp->gref[j]); + + break; + } + + case BLKIF_OP_WRITE_BARRIER: + if (status != BLKIF_RSP_OKAY) + printk("write barrier error %d\n", status); + break; + case BLKIF_OP_FLUSH_DISKCACHE: + if (status != BLKIF_RSP_OKAY) + printk("flush error %d\n", status); + break; + + default: + printk("unrecognized block operation %d response (status %d)\n", rsp->operation, status); + break; + } + + dev->ring.rsp_cons = ++cons; + /* Nota: callback frees aiocbp itself */ + if (aiocbp && aiocbp->aio_cb) + aiocbp->aio_cb(aiocbp, status ? -EIO : 0); + if (dev->ring.rsp_cons != cons) + /* We reentered, we must not continue here */ + break; + } + + RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more); + if (more) goto moretodo; + + return nr_consumed; +} + +#ifdef HAVE_LIBC +int blkfront_open(struct blkfront_dev *dev) +{ + /* Silently prevent multiple opens */ + if(dev->fd != -1) { + return dev->fd; + } + dev->fd = alloc_fd(FTYPE_BLK); + printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd); + files[dev->fd].blk.dev = dev; + files[dev->fd].blk.offset = 0; + return dev->fd; +} + +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write) +{ + struct blkfront_dev* dev = files[fd].blk.dev; + off_t offset = files[fd].blk.offset; + struct blkfront_aiocb aiocb; + unsigned long long disksize = dev->info.sectors * dev->info.sector_size; + unsigned int blocksize = dev->info.sector_size; + + int blknum; + int blkoff; + size_t bytes; + int rc = 0; + int alignedbuf = 0; + uint8_t* copybuf = NULL; + + /* RW 0 bytes is just a NOP */ + if(count == 0) { + return 0; + } + /* Check for NULL buffer */ + if( buf == NULL ) { + errno = EFAULT; + return -1; + } + + /* Write mode checks */ + if(write) { + /*Make sure we have write permission */ + if(dev->info.info & VDISK_READONLY + || (dev->info.mode != O_RDWR && dev->info.mode != O_WRONLY)) { + errno = EACCES; + return -1; + } + /*Make sure disk is big enough for this write */ + if(offset + count > disksize) { + errno = ENOSPC; + return -1; + } + } + /* Read mode checks */ + else + { + /* Reading past the disk? Just return 0 */ + if(offset >= disksize) { + return 0; + } + + /*If the requested read is bigger than the disk, just + * read as much as we can until the end */ + if(offset + count > disksize) { + count = disksize - offset; + } + } + /* Determine which block to start at and at which offset inside of it */ + blknum = offset / blocksize; + blkoff = offset % blocksize; + + /* Optimization: We need to check if buf is aligned to the sector size. + * This is somewhat tricky code. We have to add the blocksize - block offset + * because the first block may be a partial block and then for every subsequent + * block rw the buffer will be offset.*/ + if(!((uintptr_t) (buf +(blocksize - blkoff)) & (dev->info.sector_size-1))) { + alignedbuf = 1; + } + + /* Setup aiocb block object */ + aiocb.aio_dev = dev; + aiocb.aio_offset = blknum * blocksize; + aiocb.aio_cb = NULL; + aiocb.data = NULL; + + /* If our buffer is unaligned or its aligned but we will need to rw a partial block + * then a copy will have to be done */ + if(!alignedbuf || blkoff != 0 || count % blocksize != 0) { + copybuf = _xmalloc(blocksize, dev->info.sector_size); + } + + rc = count; + while(count > 0) { + /* determine how many bytes to read/write from/to the current block buffer */ + if(!alignedbuf || blkoff != 0 || count < blocksize) { + /* This is the case for unaligned R/W or partial block */ + bytes = count < blocksize - blkoff ? count : blocksize - blkoff; + aiocb.aio_nbytes = blocksize; + } else { + /* We can optimize further if buffer is page aligned */ + int not_page_aligned = 0; + if(((uintptr_t)buf) & (PAGE_SIZE -1)) { + not_page_aligned = 1; + } + + /* For an aligned R/W we can read up to the maximum transfer size */ + bytes = count > (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE + ? (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE + : count & ~(blocksize -1); + aiocb.aio_nbytes = bytes; + } + + /* read operation */ + if(!write) { + if (alignedbuf && bytes >= blocksize) { + /* If aligned and were reading a whole block, just read right into buf */ + aiocb.aio_buf = buf; + blkfront_read(&aiocb); + } else { + /* If not then we have to do a copy */ + aiocb.aio_buf = copybuf; + blkfront_read(&aiocb); + memcpy(buf, ©buf[blkoff], bytes); + } + } + /* Write operation */ + else { + if(alignedbuf && bytes >= blocksize) { + /* If aligned and were writing a whole block, just write directly from buf */ + aiocb.aio_buf = buf; + blkfront_write(&aiocb); + } else { + /* If not then we have to do a copy. */ + aiocb.aio_buf = copybuf; + /* If we're writing a partial block, we need to read the current contents first + * so we don't overwrite the extra bits with garbage */ + if(blkoff != 0 || bytes < blocksize) { + blkfront_read(&aiocb); + } + memcpy(©buf[blkoff], buf, bytes); + blkfront_write(&aiocb); + } + } + /* Will start at beginning of all remaining blocks */ + blkoff = 0; + + /* Increment counters and continue */ + count -= bytes; + buf += bytes; + if(bytes < blocksize) { + //At minimum we read one block + aiocb.aio_offset += blocksize; + } else { + //If we read more than a block, was a multiple of blocksize + aiocb.aio_offset += bytes; + } + } + + free(copybuf); + files[fd].blk.offset += rc; + return rc; + +} + +int blkfront_posix_fstat(int fd, struct stat* buf) +{ + struct blkfront_dev* dev = files[fd].blk.dev; + + buf->st_mode = dev->info.mode; + buf->st_uid = 0; + buf->st_gid = 0; + buf->st_size = dev->info.sectors * dev->info.sector_size; + buf->st_atime = buf->st_mtime = buf->st_ctime = time(NULL); + + return 0; +} +#endif diff -Nru xen-4.6.0/extras/mini-os/config/arm32.mk xen-4.6.5/extras/mini-os/config/arm32.mk --- xen-4.6.0/extras/mini-os/config/arm32.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/config/arm32.mk 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,22 @@ +CONFIG_ARM := y +CONFIG_ARM_32 := y +CONFIG_ARM_$(XEN_OS) := y + +CONFIG_XEN_INSTALL_SUFFIX := + +# -march= -mcpu= + +# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb: +CFLAGS += -marm + +HAS_PL011 := y +HAS_EXYNOS4210 := y +HAS_OMAP := y +HAS_NS16550 := y + +# Use only if calling $(LD) directly. +LDFLAGS_DIRECT += -EL + +CONFIG_LOAD_ADDRESS ?= 0x80000000 + +IOEMU_CPU_ARCH ?= arm diff -Nru xen-4.6.0/extras/mini-os/config/arm64.mk xen-4.6.5/extras/mini-os/config/arm64.mk --- xen-4.6.0/extras/mini-os/config/arm64.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/config/arm64.mk 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,19 @@ +CONFIG_ARM := y +CONFIG_ARM_64 := y +CONFIG_ARM_$(XEN_OS) := y + +CONFIG_XEN_INSTALL_SUFFIX := + +CFLAGS += #-marm -march= -mcpu= etc + +HAS_PL011 := y +HAS_NS16550 := y + +# Use only if calling $(LD) directly. +LDFLAGS_DIRECT += -EL + +CONFIG_LOAD_ADDRESS ?= 0x80000000 + +IOEMU_CPU_ARCH ?= aarch64 + +EFI_DIR ?= /usr/lib64/efi diff -Nru xen-4.6.0/extras/mini-os/config/MiniOS.mk xen-4.6.5/extras/mini-os/config/MiniOS.mk --- xen-4.6.0/extras/mini-os/config/MiniOS.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/config/MiniOS.mk 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,10 @@ +include $(MINIOS_ROOT)/config/StdGNU.mk +include $(MINIOS_ROOT)/Config.mk +CFLAGS += $(DEF_CFLAGS) $(ARCH_CFLAGS) +CPPFLAGS += $(DEF_CPPFLAGS) $(ARCH_CPPFLAGS) $(extra_incl) +ASFLAGS += $(DEF_ASFLAGS) $(ARCH_ASFLAGS) +LDFLAGS += $(DEF_LDFLAGS) $(ARCH_LDFLAGS) + +# Override settings for this OS +PTHREAD_LIBS = +nosharedlibs=y diff -Nru xen-4.6.0/extras/mini-os/config/StdGNU.mk xen-4.6.5/extras/mini-os/config/StdGNU.mk --- xen-4.6.0/extras/mini-os/config/StdGNU.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/config/StdGNU.mk 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,47 @@ +AS = $(CROSS_COMPILE)as +LD = $(CROSS_COMPILE)ld +ifeq ($(clang),y) +CC = $(CROSS_COMPILE)clang +LD_LTO = $(CROSS_COMPILE)llvm-ld +else +CC = $(CROSS_COMPILE)gcc +LD_LTO = $(CROSS_COMPILE)ld +endif +CPP = $(CC) -E +AR = $(CROSS_COMPILE)ar +RANLIB = $(CROSS_COMPILE)ranlib +NM = $(CROSS_COMPILE)nm +STRIP = $(CROSS_COMPILE)strip +OBJCOPY = $(CROSS_COMPILE)objcopy +OBJDUMP = $(CROSS_COMPILE)objdump +SIZEUTIL = $(CROSS_COMPILE)size + +# Allow git to be wrappered in the environment +GIT ?= git + +INSTALL = install +INSTALL_DIR = $(INSTALL) -d -m0755 -p +INSTALL_DATA = $(INSTALL) -m0644 -p +INSTALL_PROG = $(INSTALL) -m0755 -p + +BOOT_DIR ?= /boot + +SOCKET_LIBS = +UTIL_LIBS = -lutil +DLOPEN_LIBS = -ldl + +SONAME_LDFLAG = -soname +SHLIB_LDFLAGS = -shared + +ifneq ($(debug),y) +CFLAGS += -O2 -fomit-frame-pointer +else +# Less than -O1 produces bad code and large stack frames +CFLAGS += -O1 -fno-omit-frame-pointer +CFLAGS-$(gcc) += -fno-optimize-sibling-calls +endif + +ifeq ($(lto),y) +CFLAGS += -flto +LDFLAGS-$(clang) += -plugin LLVMgold.so +endif diff -Nru xen-4.6.0/extras/mini-os/config/x86_32.mk xen-4.6.5/extras/mini-os/config/x86_32.mk --- xen-4.6.0/extras/mini-os/config/x86_32.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/config/x86_32.mk 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,20 @@ +CONFIG_X86 := y +CONFIG_X86_32 := y +CONFIG_X86_$(XEN_OS) := y + +CONFIG_HVM := y +CONFIG_MIGRATE := y +CONFIG_XCUTILS := y + +HAS_MEM_ACCESS := y +HAS_MEM_PAGING := y +HAS_MEM_SHARING := y + +CFLAGS += -m32 -march=i686 + +# Use only if calling $(LD) directly. +LDFLAGS_DIRECT_OpenBSD = _obsd +LDFLAGS_DIRECT_FreeBSD = _fbsd +LDFLAGS_DIRECT += -melf_i386$(LDFLAGS_DIRECT_$(XEN_OS)) + +IOEMU_CPU_ARCH ?= i386 diff -Nru xen-4.6.0/extras/mini-os/config/x86_64.mk xen-4.6.5/extras/mini-os/config/x86_64.mk --- xen-4.6.0/extras/mini-os/config/x86_64.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/config/x86_64.mk 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,33 @@ +CONFIG_X86 := y +CONFIG_X86_64 := y +CONFIG_X86_$(XEN_OS) := y + +CONFIG_COMPAT := y +CONFIG_HVM := y +CONFIG_MIGRATE := y +CONFIG_XCUTILS := y + +HAS_MEM_ACCESS := y +HAS_MEM_PAGING := y +HAS_MEM_SHARING := y + +CONFIG_XEN_INSTALL_SUFFIX := .gz + +CFLAGS += -m64 + +SunOS_LIBDIR = $(SunOS_LIBDIR_x86_64) + +EFI_DIR ?= /usr/lib64/efi + +# Use only if calling $(LD) directly. +ifeq ($(XEN_OS),OpenBSD) +LDFLAGS_DIRECT += -melf_x86_64_obsd +else +ifeq ($(XEN_OS),FreeBSD) +LDFLAGS_DIRECT += -melf_x86_64_fbsd +else +LDFLAGS_DIRECT += -melf_x86_64 +endif +endif + +IOEMU_CPU_ARCH ?= x86_64 diff -Nru xen-4.6.0/extras/mini-os/Config.mk xen-4.6.5/extras/mini-os/Config.mk --- xen-4.6.0/extras/mini-os/Config.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/Config.mk 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,99 @@ +# +# Compare $(1) and $(2) and replace $(2) with $(1) if they differ +# +# Typically $(1) is a newly generated file and $(2) is the target file +# being regenerated. This prevents changing the timestamp of $(2) only +# due to being auto regenereated with the same contents. +define move-if-changed + if ! cmp -s $(1) $(2); then mv -f $(1) $(2); else rm -f $(1); fi +endef + +# cc-option: Check if compiler supports first option, else fall back to second. +# +# This is complicated by the fact that unrecognised -Wno-* options: +# (a) are ignored unless the compilation emits a warning; and +# (b) even then produce a warning rather than an error +# To handle this we do a test compile, passing the option-under-test, on a code +# fragment that will always produce a warning (integer assigned to pointer). +# We then grep for the option-under-test in the compiler's output, the presence +# of which would indicate an "unrecognized command-line option" warning/error. +# +# Usage: cflags-y += $(call cc-option,$(CC),-march=winchip-c6,-march=i586) +cc-option = $(shell if test -z "`echo 'void*p=1;' | \ + $(1) $(2) -S -o /dev/null -x c - 2>&1 | grep -- $(2) -`"; \ + then echo "$(2)"; else echo "$(3)"; fi ;) + +# Compatibility with Xen's stubdom build environment. If we are building +# stubdom, some XEN_ variables are set, set MINIOS_ variables accordingly. +# +ifneq ($(XEN_ROOT),) +MINIOS_ROOT=$(XEN_ROOT)/extras/mini-os +else +MINIOS_ROOT=$(TOPLEVEL_DIR) +endif +export MINIOS_ROOT + +ifneq ($(XEN_TARGET_ARCH),) +MINIOS_TARGET_ARCH = $(XEN_TARGET_ARCH) +else +MINIOS_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \ + -e s/i86pc/x86_32/ -e s/amd64/x86_64/ \ + -e s/armv7.*/arm32/ -e s/armv8.*/arm64/ \ + -e s/aarch64/arm64/) + +MINIOS_TARGET_ARCH ?= $(MINIOS_COMPILE_ARCH) +endif + +libc = $(stubdom) + +XEN_INTERFACE_VERSION := 0x00030205 +export XEN_INTERFACE_VERSION + +# Try to find out the architecture family TARGET_ARCH_FAM. +# First check whether x86_... is contained (for x86_32, x86_32y, x86_64). +# If not x86 then use $(MINIOS_TARGET_ARCH) +ifeq ($(findstring x86_,$(MINIOS_TARGET_ARCH)),x86_) +TARGET_ARCH_FAM = x86 +else +TARGET_ARCH_FAM = $(MINIOS_TARGET_ARCH) +endif + +# The architecture family directory below mini-os. +TARGET_ARCH_DIR := arch/$(TARGET_ARCH_FAM) + +# Export these variables for possible use in architecture dependent makefiles. +export TARGET_ARCH_DIR +export TARGET_ARCH_FAM + +# This is used for architecture specific links. +# This can be overwritten from arch specific rules. +ARCH_LINKS = + +# The path pointing to the architecture specific header files. +ARCH_INC := $(TARGET_ARCH_FAM) + +# For possible special header directories. +# This can be overwritten from arch specific rules. +EXTRA_INC = $(ARCH_INC) + +# Include the architecture family's special makerules. +# This must be before include minios.mk! +include $(MINIOS_ROOT)/$(TARGET_ARCH_DIR)/arch.mk + +extra_incl := $(foreach dir,$(EXTRA_INC),-isystem $(MINIOS_ROOT)/include/$(dir)) + +DEF_CPPFLAGS += -isystem $(MINIOS_ROOT)/include +DEF_CPPFLAGS += -D__MINIOS__ + +ifeq ($(libc),y) +DEF_CPPFLAGS += -DHAVE_LIBC +DEF_CPPFLAGS += -isystem $(MINIOS_ROOT)/include/posix +DEF_CPPFLAGS += -isystem $(XEN_ROOT)/tools/xenstore/include +endif + +ifneq ($(LWIPDIR),) +lwip=y +DEF_CPPFLAGS += -DHAVE_LWIP +DEF_CPPFLAGS += -isystem $(LWIPDIR)/src/include +DEF_CPPFLAGS += -isystem $(LWIPDIR)/src/include/ipv4 +endif diff -Nru xen-4.6.0/extras/mini-os/console/console.c xen-4.6.5/extras/mini-os/console/console.c --- xen-4.6.0/extras/mini-os/console/console.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/console/console.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,164 @@ +/* + **************************************************************************** + * (C) 2006 - Grzegorz Milos - Cambridge University + **************************************************************************** + * + * File: console.h + * Author: Grzegorz Milos + * Changes: + * + * Date: Mar 2006 + * + * Environment: Xen Minimal OS + * Description: Console interface. + * + * Handles console I/O. Defines printk. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Copies all print output to the Xen emergency console apart + of standard dom0 handled console */ +#define USE_XEN_CONSOLE + + +/* If console not initialised the printk will be sent to xen serial line + NOTE: you need to enable verbose in xen/Rules.mk for it to work. */ +static int console_initialised = 0; + +__attribute__((weak)) void console_input(char * buf, unsigned len) +{ + if(len > 0) + { + /* Just repeat what's written */ + buf[len] = '\0'; + printk("%s", buf); + + if(buf[len-1] == '\r') + printk("\nNo console input handler.\n"); + } +} + +#ifndef HAVE_LIBC +void xencons_rx(char *buf, unsigned len, struct pt_regs *regs) +{ + console_input(buf, len); +} + +void xencons_tx(void) +{ + /* Do nothing, handled by _rx */ +} +#endif + + +void console_print(struct consfront_dev *dev, char *data, int length) +{ + char *curr_char, saved_char; + char copied_str[length+1]; + char *copied_ptr; + int part_len; + int (*ring_send_fn)(struct consfront_dev *dev, const char *data, unsigned length); + + if(!console_initialised) + ring_send_fn = xencons_ring_send_no_notify; + else + ring_send_fn = xencons_ring_send; + + copied_ptr = copied_str; + memcpy(copied_ptr, data, length); + for(curr_char = copied_ptr; curr_char < copied_ptr+length-1; curr_char++) + { + if(*curr_char == '\n') + { + *curr_char = '\r'; + saved_char = *(curr_char+1); + *(curr_char+1) = '\n'; + part_len = curr_char - copied_ptr + 2; + ring_send_fn(dev, copied_ptr, part_len); + *(curr_char+1) = saved_char; + copied_ptr = curr_char+1; + length -= part_len - 1; + } + } + + if (copied_ptr[length-1] == '\n') { + copied_ptr[length-1] = '\r'; + copied_ptr[length] = '\n'; + length++; + } + + ring_send_fn(dev, copied_ptr, length); +} + +void print(int direct, const char *fmt, va_list args) +{ + static char buf[1024]; + + (void)vsnprintf(buf, sizeof(buf), fmt, args); + + if(direct) + { + (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(buf), buf); + return; + } else { +#ifndef USE_XEN_CONSOLE + if(!console_initialised) +#endif + (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(buf), buf); + + console_print(NULL, buf, strlen(buf)); + } +} + +void printk(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + print(0, fmt, args); + va_end(args); +} + +void xprintk(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + print(1, fmt, args); + va_end(args); +} +void init_console(void) +{ + printk("Initialising console ... "); + xencons_ring_init(); + console_initialised = 1; + /* This is also required to notify the daemon */ + printk("done.\n"); +} diff -Nru xen-4.6.0/extras/mini-os/console/console.h xen-4.6.5/extras/mini-os/console/console.h --- xen-4.6.0/extras/mini-os/console/console.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/console/console.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,2 @@ + +void console_handle_input(evtchn_port_t port, struct pt_regs *regs, void *data); diff -Nru xen-4.6.0/extras/mini-os/console/xenbus.c xen-4.6.5/extras/mini-os/console/xenbus.c --- xen-4.6.0/extras/mini-os/console/xenbus.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/console/xenbus.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,195 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "console.h" + +void free_consfront(struct consfront_dev *dev) +{ + char* err = NULL; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/state") + 1]; + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("free_consfront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("free_consfront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close; + } + +close: + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err); + + mask_evtchn(dev->evtchn); + unbind_evtchn(dev->evtchn); + free(dev->backend); + free(dev->nodename); + + gnttab_end_access(dev->ring_ref); + + free_page(dev->ring); + free(dev); +} + +struct consfront_dev *init_consfront(char *_nodename) +{ + xenbus_transaction_t xbt; + char* err = NULL; + char* message=NULL; + int retry=0; + char* msg = NULL; + char nodename[256]; + char path[256]; + static int consfrontends = 3; + struct consfront_dev *dev; + int res; + + if (!_nodename) + snprintf(nodename, sizeof(nodename), "device/console/%d", consfrontends); + else { + strncpy(nodename, _nodename, sizeof(nodename) - 1); + nodename[sizeof(nodename) - 1] = 0; + } + + printk("******************* CONSFRONT for %s **********\n\n\n", nodename); + + consfrontends++; + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + if ((res = xenbus_read_integer(path)) < 0) + goto error; + else + dev->dom = res; + evtchn_alloc_unbound(dev->dom, console_handle_input, dev, &dev->evtchn); + + dev->ring = (struct xencons_interface *) alloc_page(); + memset(dev->ring, 0, PAGE_SIZE); + dev->ring_ref = gnttab_grant_access(dev->dom, virt_to_mfn(dev->ring), 0); + + dev->events = NULL; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "ring-ref","%u", + dev->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "port", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); + if (err) { + message = "writing protocol"; + goto abort_transaction; + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateConnected); + if (err) { + message = "switching state"; + goto abort_transaction; + } + + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + if (msg) { + printk("Error %s when reading the backend path %s\n", msg, path); + goto error; + } + + printk("backend at %s\n", dev->backend); + + { + XenbusState state; + char path[strlen(dev->backend) + strlen("/state") + 1]; + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + msg = NULL; + state = xenbus_read_integer(path); + while (msg == NULL && state < XenbusStateConnected) + msg = xenbus_wait_for_state_change(path, &state, &dev->events); + if (msg != NULL || state != XenbusStateConnected) { + printk("backend not available, state=%d\n", state); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + } + unmask_evtchn(dev->evtchn); + + printk("**************************\n"); + + return dev; + +error: + free(msg); + free(err); + free_consfront(dev); + return NULL; +} + +void fini_console(struct consfront_dev *dev) +{ + if (dev) free_consfront(dev); +} + diff -Nru xen-4.6.0/extras/mini-os/console/xencons_ring.c xen-4.6.5/extras/mini-os/console/xencons_ring.c --- xen-4.6.0/extras/mini-os/console/xencons_ring.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/console/xencons_ring.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,195 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "console.h" + +DECLARE_WAIT_QUEUE_HEAD(console_queue); + +static inline void notify_daemon(struct consfront_dev *dev) +{ + /* Use evtchn: this is called early, before irq is set up. */ + if (!dev) + notify_remote_via_evtchn(start_info.console.domU.evtchn); + else + notify_remote_via_evtchn(dev->evtchn); +} + +static inline struct xencons_interface *xencons_interface(void) +{ + if (start_info.console.domU.evtchn) + return mfn_to_virt(start_info.console.domU.mfn); + else + return NULL; +} + +int xencons_ring_send_no_notify(struct consfront_dev *dev, const char *data, unsigned len) +{ + int sent = 0; + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; + + if (!dev) + intf = xencons_interface(); + else + intf = dev->ring; + if (!intf) + return sent; + + cons = intf->out_cons; + prod = intf->out_prod; + mb(); + BUG_ON((prod - cons) > sizeof(intf->out)); + + while ((sent < len) && ((prod - cons) < sizeof(intf->out))) + intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; + + wmb(); + intf->out_prod = prod; + + return sent; +} + +int xencons_ring_send(struct consfront_dev *dev, const char *data, unsigned len) +{ + int sent; + + sent = xencons_ring_send_no_notify(dev, data, len); + notify_daemon(dev); + + return sent; +} + + + +void console_handle_input(evtchn_port_t port, struct pt_regs *regs, void *data) +{ + struct consfront_dev *dev = (struct consfront_dev *) data; +#ifdef HAVE_LIBC + int fd = dev ? dev->fd : -1; + + if (fd != -1) + files[fd].read = 1; + + wake_up(&console_queue); +#else + struct xencons_interface *intf = xencons_interface(); + XENCONS_RING_IDX cons, prod; + + cons = intf->in_cons; + prod = intf->in_prod; + mb(); + BUG_ON((prod - cons) > sizeof(intf->in)); + + while (cons != prod) { + xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs); + cons++; + } + + mb(); + intf->in_cons = cons; + + notify_daemon(dev); + + xencons_tx(); +#endif +} + +#ifdef HAVE_LIBC +int xencons_ring_avail(struct consfront_dev *dev) +{ + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; + + if (!dev) + intf = xencons_interface(); + else + intf = dev->ring; + + cons = intf->in_cons; + prod = intf->in_prod; + mb(); + BUG_ON((prod - cons) > sizeof(intf->in)); + + return prod - cons; +} + +int xencons_ring_recv(struct consfront_dev *dev, char *data, unsigned len) +{ + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; + unsigned filled = 0; + + if (!dev) + intf = xencons_interface(); + else + intf = dev->ring; + + cons = intf->in_cons; + prod = intf->in_prod; + mb(); + BUG_ON((prod - cons) > sizeof(intf->in)); + + while (filled < len && cons + filled != prod) { + data[filled] = *(intf->in + MASK_XENCONS_IDX(cons + filled, intf->in)); + filled++; + } + + mb(); + intf->in_cons = cons + filled; + + notify_daemon(dev); + + return filled; +} +#endif + +struct consfront_dev *xencons_ring_init(void) +{ + int err; + struct consfront_dev *dev; + + if (!start_info.console.domU.evtchn) + return 0; + + dev = malloc(sizeof(struct consfront_dev)); + memset(dev, 0, sizeof(struct consfront_dev)); + dev->nodename = "device/console"; + dev->dom = 0; + dev->backend = 0; + dev->ring_ref = 0; + +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + dev->evtchn = start_info.console.domU.evtchn; + dev->ring = (struct xencons_interface *) mfn_to_virt(start_info.console.domU.mfn); + + err = bind_evtchn(dev->evtchn, console_handle_input, dev); + if (err <= 0) { + printk("XEN console request chn bind failed %i\n", err); + free(dev); + return NULL; + } + unmask_evtchn(dev->evtchn); + + /* In case we have in-flight data after save/restore... */ + notify_daemon(dev); + + return dev; +} + +void xencons_resume(void) +{ + (void)xencons_ring_init(); +} + diff -Nru xen-4.6.0/extras/mini-os/COPYING xen-4.6.5/extras/mini-os/COPYING --- xen-4.6.0/extras/mini-os/COPYING 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/COPYING 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,36 @@ +Certain files in this directory are licensed by the GNU +General Public License version 2 (GPLv2). By default these +files are not built and linked into MiniOs. Enabling them +will cause the whole work to become covered by the GPLv2. + +The current set of GPLv2 features are: +CONFIG_TPMFRONT +CONFIG_TPMBACK +CONFIG_TPM_TIS + +Do not use these if you do not want your MiniOS build to become +GPL licensed! + +Copyright (c) 2009 Citrix Systems, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + diff -Nru xen-4.6.0/extras/mini-os/daytime.c xen-4.6.5/extras/mini-os/daytime.c --- xen-4.6.0/extras/mini-os/daytime.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/daytime.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,67 @@ +/* + * daytime.c: a simple network service based on lwIP and mini-os + * + * Tim Deegan , July 2007 + */ + +#include +#include +#include +#include +#include + +static char message[29]; + +void run_server(void *p) +{ + struct ip_addr listenaddr = { 0 }; + struct netconn *listener; + struct netconn *session; + struct timeval tv; + err_t rc; + + start_networking(); + + if (0) { + struct ip_addr ipaddr = { htonl(0x0a000001) }; + struct ip_addr netmask = { htonl(0xff000000) }; + struct ip_addr gw = { 0 }; + networking_set_addr(&ipaddr, &netmask, &gw); + } + + tprintk("Opening connection\n"); + + listener = netconn_new(NETCONN_TCP); + tprintk("Connection at %p\n", listener); + + rc = netconn_bind(listener, &listenaddr, 13); + if (rc != ERR_OK) { + tprintk("Failed to bind connection: %i\n", rc); + return; + } + + rc = netconn_listen(listener); + if (rc != ERR_OK) { + tprintk("Failed to listen on connection: %i\n", rc); + return; + } + + while (1) { + session = netconn_accept(listener); + if (session == NULL) + continue; + + gettimeofday(&tv, NULL); + sprintf(message, "%20lu.%6.6lu\n", tv.tv_sec, tv.tv_usec); + (void) netconn_write(session, message, strlen(message), NETCONN_COPY); + (void) netconn_disconnect(session); + (void) netconn_delete(session); + } +} + + +int app_main(start_info_t *si) +{ + create_thread("server", run_server, NULL); + return 0; +} diff -Nru xen-4.6.0/extras/mini-os/domain_config xen-4.6.5/extras/mini-os/domain_config --- xen-4.6.0/extras/mini-os/domain_config 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/domain_config 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,19 @@ +# -*- mode: python; -*- +#============================================================================ +# Python configuration setup for 'xm create'. +# This script sets the parameters used when a domain is created using 'xm create'. +# You use a separate script for each domain you want to create, or +# you can set the parameters for the domain on the xm command line. +#============================================================================ + +#---------------------------------------------------------------------------- +# Kernel image file. +kernel = "mini-os.gz" + +# Initial memory allocation (in megabytes) for the new domain. +memory = 32 + +# A name for your domain. All domains must have different names. +name = "Mini-OS" + +on_crash = 'destroy' diff -Nru xen-4.6.0/extras/mini-os/events.c xen-4.6.5/extras/mini-os/events.c --- xen-4.6.0/extras/mini-os/events.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/events.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,269 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: events.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: Jul 2003, changes Jun 2005 + * + * Environment: Xen Minimal OS + * Description: Deals with events recieved on event channels + * + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include + +#define NR_EVS 1024 + +/* this represents a event handler. Chaining or sharing is not allowed */ +typedef struct _ev_action_t { + evtchn_handler_t handler; + void *data; + uint32_t count; +} ev_action_t; + +static ev_action_t ev_actions[NR_EVS]; +void default_handler(evtchn_port_t port, struct pt_regs *regs, void *data); + +static unsigned long bound_ports[NR_EVS/(8*sizeof(unsigned long))]; + +void unbind_all_ports(void) +{ + int i; + int cpu = 0; + shared_info_t *s = HYPERVISOR_shared_info; + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; + + for ( i = 0; i < NR_EVS; i++ ) + { + if ( i == start_info.console.domU.evtchn || + i == start_info.store_evtchn) + continue; + + if ( test_and_clear_bit(i, bound_ports) ) + { + printk("port %d still bound!\n", i); + unbind_evtchn(i); + } + } + vcpu_info->evtchn_upcall_pending = 0; + vcpu_info->evtchn_pending_sel = 0; +} + +/* + * Demux events to different handlers. + */ +int do_event(evtchn_port_t port, struct pt_regs *regs) +{ + ev_action_t *action; + + clear_evtchn(port); + + if ( port >= NR_EVS ) + { + printk("WARN: do_event(): Port number too large: %d\n", port); + return 1; + } + + action = &ev_actions[port]; + action->count++; + + /* call the handler */ + action->handler(port, regs, action->data); + + return 1; + +} + +evtchn_port_t bind_evtchn(evtchn_port_t port, evtchn_handler_t handler, + void *data) +{ + if ( ev_actions[port].handler != default_handler ) + printk("WARN: Handler for port %d already registered, replacing\n", + port); + + ev_actions[port].data = data; + wmb(); + ev_actions[port].handler = handler; + set_bit(port, bound_ports); + + return port; +} + +void unbind_evtchn(evtchn_port_t port ) +{ + struct evtchn_close close; + int rc; + + if ( ev_actions[port].handler == default_handler ) + printk("WARN: No handler for port %d when unbinding\n", port); + mask_evtchn(port); + clear_evtchn(port); + + ev_actions[port].handler = default_handler; + wmb(); + ev_actions[port].data = NULL; + clear_bit(port, bound_ports); + + close.port = port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + if ( rc ) + printk("WARN: close_port %d failed rc=%d. ignored\n", port, rc); +} + +evtchn_port_t bind_virq(uint32_t virq, evtchn_handler_t handler, void *data) +{ + evtchn_bind_virq_t op; + int rc; + + /* Try to bind the virq to a port */ + op.virq = virq; + op.vcpu = smp_processor_id(); + + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &op); + if (rc != 0) + { + printk("Failed to bind virtual IRQ %d with rc=%d\n", virq, rc); + return -1; + } + bind_evtchn(op.port, handler, data); + return op.port; +} + +evtchn_port_t bind_pirq(uint32_t pirq, int will_share, + evtchn_handler_t handler, void *data) +{ + evtchn_bind_pirq_t op; + int rc; + + /* Try to bind the pirq to a port */ + op.pirq = pirq; + op.flags = will_share ? BIND_PIRQ__WILL_SHARE : 0; + + if ( (rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &op)) != 0 ) + { + printk("Failed to bind physical IRQ %d with rc=%d\n", pirq, rc); + return -1; + } + bind_evtchn(op.port, handler, data); + return op.port; +} + +/* + * Initially all events are without a handler and disabled + */ +void init_events(void) +{ + int i; + + /* initialize event handler */ + for ( i = 0; i < NR_EVS; i++ ) + { + ev_actions[i].handler = default_handler; + mask_evtchn(i); + } + + arch_init_events(); +} + +void fini_events(void) +{ + /* Dealloc all events */ + arch_unbind_ports(); + unbind_all_ports(); + arch_fini_events(); +} + +void default_handler(evtchn_port_t port, struct pt_regs *regs, void *ignore) +{ + printk("[Port %d] - event received\n", port); +} + +/* Create a port available to the pal for exchanging notifications. + Returns the result of the hypervisor call. */ + +/* Unfortunate confusion of terminology: the port is unbound as far + as Xen is concerned, but we automatically bind a handler to it + from inside mini-os. */ + +int evtchn_alloc_unbound(domid_t pal, evtchn_handler_t handler, + void *data, evtchn_port_t *port) +{ + int rc; + + evtchn_alloc_unbound_t op; + op.dom = DOMID_SELF; + op.remote_dom = pal; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op); + if ( rc ) + { + printk("ERROR: alloc_unbound failed with rc=%d", rc); + return rc; + } + *port = bind_evtchn(op.port, handler, data); + return rc; +} + +/* Connect to a port so as to allow the exchange of notifications with + the pal. Returns the result of the hypervisor call. */ + +int evtchn_bind_interdomain(domid_t pal, evtchn_port_t remote_port, + evtchn_handler_t handler, void *data, + evtchn_port_t *local_port) +{ + int rc; + evtchn_port_t port; + evtchn_bind_interdomain_t op; + op.remote_dom = pal; + op.remote_port = remote_port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &op); + if ( rc ) + { + printk("ERROR: bind_interdomain failed with rc=%d", rc); + return rc; + } + port = op.local_port; + *local_port = bind_evtchn(port, handler, data); + return rc; +} + +int evtchn_get_peercontext(evtchn_port_t local_port, char *ctx, int size) +{ + int rc; + uint32_t sid; + struct xen_flask_op op; + op.cmd = FLASK_GET_PEER_SID; + op.interface_version = XEN_FLASK_INTERFACE_VERSION; + op.u.peersid.evtchn = local_port; + rc = HYPERVISOR_xsm_op(&op); + if (rc) + return rc; + sid = op.u.peersid.sid; + op.cmd = FLASK_SID_TO_CONTEXT; + op.u.sid_context.sid = sid; + op.u.sid_context.size = size; + set_xen_guest_handle(op.u.sid_context.context, ctx); + rc = HYPERVISOR_xsm_op(&op); + return rc; +} + + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/fbfront.c xen-4.6.5/extras/mini-os/fbfront.c --- xen-4.6.0/extras/mini-os/fbfront.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/fbfront.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,710 @@ +/* + * Frame Buffer + Keyboard driver for Mini-OS. + * Samuel Thibault , 2008 + * Based on blkfront.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DECLARE_WAIT_QUEUE_HEAD(kbdfront_queue); + + + + + + +struct kbdfront_dev { + domid_t dom; + + struct xenkbd_page *page; + evtchn_port_t evtchn; + + char *nodename; + char *backend; + + xenbus_event_queue events; + +#ifdef HAVE_LIBC + int fd; +#endif +}; + +void kbdfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ +#ifdef HAVE_LIBC + struct kbdfront_dev *dev = data; + int fd = dev->fd; + + if (fd != -1) + files[fd].read = 1; +#endif + wake_up(&kbdfront_queue); +} + +static void free_kbdfront(struct kbdfront_dev *dev) +{ + mask_evtchn(dev->evtchn); + + free(dev->backend); + + free_page(dev->page); + + unbind_evtchn(dev->evtchn); + + free(dev->nodename); + free(dev); +} + +struct kbdfront_dev *init_kbdfront(char *_nodename, int abs_pointer) +{ + xenbus_transaction_t xbt; + char* err; + char* message=NULL; + struct xenkbd_page *s; + int retry=0; + char* msg = NULL; + char* nodename = _nodename ? _nodename : "device/vkbd/0"; + struct kbdfront_dev *dev; + + char path[strlen(nodename) + strlen("/backend-id") + 1]; + + printk("******************* KBDFRONT for %s **********\n\n\n", nodename); + + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dev->dom = xenbus_read_integer(path); + evtchn_alloc_unbound(dev->dom, kbdfront_handler, dev, &dev->evtchn); + + dev->page = s = (struct xenkbd_page*) alloc_page(); + memset(s,0,PAGE_SIZE); + + dev->events = NULL; + + s->in_cons = s->in_prod = 0; + s->out_cons = s->out_prod = 0; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "page-ref","%lu", virt_to_mfn(s)); + if (err) { + message = "writing page-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "event-channel", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + if (abs_pointer) { + err = xenbus_printf(xbt, nodename, "request-abs-pointer", "1"); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateInitialised); + if (err) { + printk("error writing initialized: %s\n", err); + free(err); + } + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + if (msg) { + printk("Error %s when reading the backend path %s\n", msg, path); + goto error; + } + + printk("backend at %s\n", dev->backend); + + { + XenbusState state; + char path[strlen(dev->backend) + strlen("/state") + 1]; + char frontpath[strlen(nodename) + strlen("/state") + 1]; + + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + + err = NULL; + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateConnected) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + if (state != XenbusStateConnected) { + printk("backend not available, state=%d\n", state); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + + printk("%s connected\n", dev->backend); + + snprintf(frontpath, sizeof(frontpath), "%s/state", nodename); + if((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected)) + != NULL) { + printk("error switching state: %s\n", err); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + } + unmask_evtchn(dev->evtchn); + + printk("************************** KBDFRONT\n"); + + return dev; +error: + free(msg); + free(err); + free_kbdfront(dev); + return NULL; +} + +int kbdfront_receive(struct kbdfront_dev *dev, union xenkbd_in_event *buf, int n) +{ + struct xenkbd_page *page = dev->page; + uint32_t prod, cons; + int i; + +#ifdef HAVE_LIBC + if (dev->fd != -1) { + files[dev->fd].read = 0; + mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */ + } +#endif + + prod = page->in_prod; + + if (prod == page->in_cons) + return 0; + + rmb(); /* ensure we see ring contents up to prod */ + + for (i = 0, cons = page->in_cons; i < n && cons != prod; i++, cons++) + memcpy(buf + i, &XENKBD_IN_RING_REF(page, cons), sizeof(*buf)); + + mb(); /* ensure we got ring contents */ + page->in_cons = cons; + notify_remote_via_evtchn(dev->evtchn); + +#ifdef HAVE_LIBC + if (cons != prod && dev->fd != -1) + /* still some events to read */ + files[dev->fd].read = 1; +#endif + + return i; +} + + +void shutdown_kbdfront(struct kbdfront_dev *dev) +{ + char* err = NULL, *err2; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/request-abs-pointer") + 1]; + + printk("close kbd: backend at %s\n",dev->backend); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("shutdown_kbdfront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close_kbdfront; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("shutdown_kbdfront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close_kbdfront; + } + state = xenbus_read_integer(path); + while (state < XenbusStateClosed) { + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + } + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { + printk("shutdown_kbdfront: error changing state to %d: %s\n", + XenbusStateInitialising, err); + goto close_kbdfront; + } + state = xenbus_read_integer(path); + while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed)) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + +close_kbdfront: + free(err); + err2 = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err2); + + snprintf(nodename, sizeof(nodename), "%s/page-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/request-abs-pointer", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + + if (!err) + free_kbdfront(dev); +} + +#ifdef HAVE_LIBC +int kbdfront_open(struct kbdfront_dev *dev) +{ + dev->fd = alloc_fd(FTYPE_KBD); + printk("kbd_open(%s) -> %d\n", dev->nodename, dev->fd); + files[dev->fd].kbd.dev = dev; + return dev->fd; +} +#endif + + + + + +DECLARE_WAIT_QUEUE_HEAD(fbfront_queue); + + + + + + +struct fbfront_dev { + domid_t dom; + + struct xenfb_page *page; + evtchn_port_t evtchn; + + char *nodename; + char *backend; + int request_update; + + int width; + int height; + int depth; + int stride; + int mem_length; + int offset; + + xenbus_event_queue events; + +#ifdef HAVE_LIBC + int fd; +#endif +}; + +void fbfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ +#ifdef HAVE_LIBC + struct fbfront_dev *dev = data; + int fd = dev->fd; + + if (fd != -1) + files[fd].read = 1; +#endif + wake_up(&fbfront_queue); +} + +static void free_fbfront(struct fbfront_dev *dev) +{ + mask_evtchn(dev->evtchn); + + free(dev->backend); + + free_page(dev->page); + + unbind_evtchn(dev->evtchn); + + free(dev->nodename); + free(dev); +} + +int fbfront_receive(struct fbfront_dev *dev, union xenfb_in_event *buf, int n) +{ + struct xenfb_page *page = dev->page; + uint32_t prod, cons; + int i; + +#ifdef HAVE_LIBC + if (dev->fd != -1) { + files[dev->fd].read = 0; + mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */ + } +#endif + + prod = page->in_prod; + + if (prod == page->in_cons) + return 0; + + rmb(); /* ensure we see ring contents up to prod */ + + for (i = 0, cons = page->in_cons; i < n && cons != prod; i++, cons++) + memcpy(buf + i, &XENFB_IN_RING_REF(page, cons), sizeof(*buf)); + + mb(); /* ensure we got ring contents */ + page->in_cons = cons; + notify_remote_via_evtchn(dev->evtchn); + +#ifdef HAVE_LIBC + if (cons != prod && dev->fd != -1) + /* still some events to read */ + files[dev->fd].read = 1; +#endif + + return i; +} + +struct fbfront_dev *init_fbfront(char *_nodename, unsigned long *mfns, int width, int height, int depth, int stride, int n) +{ + xenbus_transaction_t xbt; + char* err; + char* message=NULL; + struct xenfb_page *s; + int retry=0; + char* msg=NULL; + int i, j; + struct fbfront_dev *dev; + int max_pd; + unsigned long mapped; + char* nodename = _nodename ? _nodename : "device/vfb/0"; + + char path[strlen(nodename) + strlen("/backend-id") + 1]; + + printk("******************* FBFRONT for %s **********\n\n\n", nodename); + + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dev->dom = xenbus_read_integer(path); + evtchn_alloc_unbound(dev->dom, fbfront_handler, dev, &dev->evtchn); + + dev->page = s = (struct xenfb_page*) alloc_page(); + memset(s,0,PAGE_SIZE); + + s->in_cons = s->in_prod = 0; + s->out_cons = s->out_prod = 0; + dev->width = s->width = width; + dev->height = s->height = height; + dev->depth = s->depth = depth; + dev->stride = s->line_length = stride; + dev->mem_length = s->mem_length = n * PAGE_SIZE; + dev->offset = 0; + dev->events = NULL; + + max_pd = sizeof(s->pd) / sizeof(s->pd[0]); + mapped = 0; + + for (i = 0; mapped < n && i < max_pd; i++) { + unsigned long *pd = (unsigned long *) alloc_page(); + for (j = 0; mapped < n && j < PAGE_SIZE / sizeof(unsigned long); j++) + pd[j] = mfns[mapped++]; + for ( ; j < PAGE_SIZE / sizeof(unsigned long); j++) + pd[j] = 0; + s->pd[i] = virt_to_mfn(pd); + } + for ( ; i < max_pd; i++) + s->pd[i] = 0; + + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "page-ref","%lu", virt_to_mfn(s)); + if (err) { + message = "writing page-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "event-channel", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "protocol", "%s", + XEN_IO_PROTO_ABI_NATIVE); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "feature-update", "1"); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateInitialised); + if (err) { + message = "switching state"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + if (msg) { + printk("Error %s when reading the backend path %s\n", msg, path); + goto error; + } + + printk("backend at %s\n", dev->backend); + + { + XenbusState state; + char path[strlen(dev->backend) + strlen("/request-update") + 1]; + char frontpath[strlen(nodename) + strlen("/state") + 1]; + + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + + err = NULL; + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateConnected) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + if (state != XenbusStateConnected) { + printk("backend not available, state=%d\n", state); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + + printk("%s connected\n", dev->backend); + + snprintf(path, sizeof(path), "%s/request-update", dev->backend); + dev->request_update = xenbus_read_integer(path); + + snprintf(frontpath, sizeof(frontpath), "%s/state", nodename); + if ((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected)) + != NULL) { + printk("error switching state: %s\n", err); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + } + unmask_evtchn(dev->evtchn); + + printk("************************** FBFRONT\n"); + + return dev; + +error: + free(msg); + free(err); + free_fbfront(dev); + return NULL; +} + +static void fbfront_out_event(struct fbfront_dev *dev, union xenfb_out_event *event) +{ + struct xenfb_page *page = dev->page; + uint32_t prod; + DEFINE_WAIT(w); + + add_waiter(w, fbfront_queue); + while (page->out_prod - page->out_cons == XENFB_OUT_RING_LEN) + schedule(); + remove_waiter(w, fbfront_queue); + + prod = page->out_prod; + mb(); /* ensure ring space available */ + XENFB_OUT_RING_REF(page, prod) = *event; + wmb(); /* ensure ring contents visible */ + page->out_prod = prod + 1; + notify_remote_via_evtchn(dev->evtchn); +} + +void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int height) +{ + struct xenfb_update update; + + if (dev->request_update <= 0) + return; + + if (x < 0) { + width += x; + x = 0; + } + if (x + width > dev->width) + width = dev->width - x; + + if (y < 0) { + height += y; + y = 0; + } + if (y + height > dev->height) + height = dev->height - y; + + if (width <= 0 || height <= 0) + return; + + update.type = XENFB_TYPE_UPDATE; + update.x = x; + update.y = y; + update.width = width; + update.height = height; + fbfront_out_event(dev, (union xenfb_out_event *) &update); +} + +void fbfront_resize(struct fbfront_dev *dev, int width, int height, int stride, int depth, int offset) +{ + struct xenfb_resize resize; + + resize.type = XENFB_TYPE_RESIZE; + dev->width = resize.width = width; + dev->height = resize.height = height; + dev->stride = resize.stride = stride; + dev->depth = resize.depth = depth; + dev->offset = resize.offset = offset; + fbfront_out_event(dev, (union xenfb_out_event *) &resize); +} + +void shutdown_fbfront(struct fbfront_dev *dev) +{ + char* err = NULL, *err2; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/feature-update") + 1]; + + printk("close fb: backend at %s\n",dev->backend); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("shutdown_fbfront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close_fbfront; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("shutdown_fbfront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close_fbfront; + } + state = xenbus_read_integer(path); + if (state < XenbusStateClosed) { + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + } + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { + printk("shutdown_fbfront: error changing state to %d: %s\n", + XenbusStateInitialising, err); + goto close_fbfront; + } + state = xenbus_read_integer(path); + while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed)) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + +close_fbfront: + free(err); + err2 = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err2); + + snprintf(nodename, sizeof(nodename), "%s/page-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/protocol", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/feature-update", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + + if (!err) + free_fbfront(dev); +} + +#ifdef HAVE_LIBC +int fbfront_open(struct fbfront_dev *dev) +{ + dev->fd = alloc_fd(FTYPE_FB); + printk("fb_open(%s) -> %d\n", dev->nodename, dev->fd); + files[dev->fd].fb.dev = dev; + return dev->fd; +} +#endif + diff -Nru xen-4.6.0/extras/mini-os/gntmap.c xen-4.6.5/extras/mini-os/gntmap.c --- xen-4.6.0/extras/mini-os/gntmap.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/gntmap.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,250 @@ +/* + * Manages grant mappings from other domains. + * + * Diego Ongaro , July 2008 + * + * Files of type FTYPE_GNTMAP contain a gntmap, which is an array of + * (host address, grant handle) pairs. Grant handles come from a hypervisor map + * operation and are needed for the corresponding unmap. + * + * This is a rather naive implementation in terms of performance. If we start + * using it frequently, there's definitely some low-hanging fruit here. + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +//#define GNTMAP_DEBUG +#ifdef GNTMAP_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(gntmap.c:%d): %s" _f "\n", __LINE__, __func__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + + +#define DEFAULT_MAX_GRANTS 128 + +struct gntmap_entry { + unsigned long host_addr; + grant_handle_t handle; +}; + +static inline int +gntmap_entry_used(struct gntmap_entry *entry) +{ + return entry->host_addr != 0; +} + +static struct gntmap_entry* +gntmap_find_free_entry(struct gntmap *map) +{ + int i; + + for (i = 0; i < map->nentries; i++) { + if (!gntmap_entry_used(&map->entries[i])) + return &map->entries[i]; + } + + DEBUG("(map=%p): all %d entries full", + map, map->nentries); + return NULL; +} + +static struct gntmap_entry* +gntmap_find_entry(struct gntmap *map, unsigned long addr) +{ + int i; + + for (i = 0; i < map->nentries; i++) { + if (map->entries[i].host_addr == addr) + return &map->entries[i]; + } + return NULL; +} + +int +gntmap_set_max_grants(struct gntmap *map, int count) +{ + DEBUG("(map=%p, count=%d)", map, count); + + if (map->nentries != 0) + return -EBUSY; + + map->entries = xmalloc_array(struct gntmap_entry, count); + if (map->entries == NULL) + return -ENOMEM; + + memset(map->entries, 0, sizeof(struct gntmap_entry) * count); + map->nentries = count; + return 0; +} + +static int +_gntmap_map_grant_ref(struct gntmap_entry *entry, + unsigned long host_addr, + uint32_t domid, + uint32_t ref, + int writable) +{ + struct gnttab_map_grant_ref op; + int rc; + + op.ref = (grant_ref_t) ref; + op.dom = (domid_t) domid; + op.host_addr = (uint64_t) host_addr; + op.flags = GNTMAP_host_map; + if (!writable) + op.flags |= GNTMAP_readonly; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); + if (rc != 0 || op.status != GNTST_okay) { + printk("GNTTABOP_map_grant_ref failed: " + "returned %d, status %" PRId16 "\n", + rc, op.status); + return rc != 0 ? rc : op.status; + } + + entry->host_addr = host_addr; + entry->handle = op.handle; + return 0; +} + +static int +_gntmap_unmap_grant_ref(struct gntmap_entry *entry) +{ + struct gnttab_unmap_grant_ref op; + int rc; + + op.host_addr = (uint64_t) entry->host_addr; + op.dev_bus_addr = 0; + op.handle = entry->handle; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); + if (rc != 0 || op.status != GNTST_okay) { + printk("GNTTABOP_unmap_grant_ref failed: " + "returned %d, status %" PRId16 "\n", + rc, op.status); + return rc != 0 ? rc : op.status; + } + + entry->host_addr = 0; + return 0; +} + +int +gntmap_munmap(struct gntmap *map, unsigned long start_address, int count) +{ + int i, rc; + struct gntmap_entry *ent; + + DEBUG("(map=%p, start_address=%lx, count=%d)", + map, start_address, count); + + for (i = 0; i < count; i++) { + ent = gntmap_find_entry(map, start_address + PAGE_SIZE * i); + if (ent == NULL) { + printk("gntmap: tried to munmap unknown page\n"); + return -EINVAL; + } + + rc = _gntmap_unmap_grant_ref(ent); + if (rc != 0) + return rc; + } + + return 0; +} + +void* +gntmap_map_grant_refs(struct gntmap *map, + uint32_t count, + uint32_t *domids, + int domids_stride, + uint32_t *refs, + int writable) +{ + unsigned long addr; + struct gntmap_entry *ent; + int i; + + DEBUG("(map=%p, count=%" PRIu32 ", " + "domids=%p [%" PRIu32 "...], domids_stride=%d, " + "refs=%p [%" PRIu32 "...], writable=%d)", + map, count, + domids, domids == NULL ? 0 : domids[0], domids_stride, + refs, refs == NULL ? 0 : refs[0], writable); + + (void) gntmap_set_max_grants(map, DEFAULT_MAX_GRANTS); + + addr = allocate_ondemand((unsigned long) count, 1); + if (addr == 0) + return NULL; + + for (i = 0; i < count; i++) { + ent = gntmap_find_free_entry(map); + if (ent == NULL || + _gntmap_map_grant_ref(ent, + addr + PAGE_SIZE * i, + domids[i * domids_stride], + refs[i], + writable) != 0) { + + (void) gntmap_munmap(map, addr, i); + return NULL; + } + } + + return (void*) addr; +} + +void +gntmap_init(struct gntmap *map) +{ + DEBUG("(map=%p)", map); + map->nentries = 0; + map->entries = NULL; +} + +void +gntmap_fini(struct gntmap *map) +{ + struct gntmap_entry *ent; + int i; + + DEBUG("(map=%p)", map); + + for (i = 0; i < map->nentries; i++) { + ent = &map->entries[i]; + if (gntmap_entry_used(ent)) + (void) _gntmap_unmap_grant_ref(ent); + } + + xfree(map->entries); + map->entries = NULL; + map->nentries = 0; +} diff -Nru xen-4.6.0/extras/mini-os/gnttab.c xen-4.6.5/extras/mini-os/gnttab.c --- xen-4.6.0/extras/mini-os/gnttab.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/gnttab.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,196 @@ +/* + **************************************************************************** + * (C) 2006 - Cambridge University + **************************************************************************** + * + * File: gnttab.c + * Author: Steven Smith (sos22@cam.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: July 2006 + * + * Environment: Xen Minimal OS + * Description: Simple grant tables implementation. About as stupid as it's + * possible to be and still work. + * + **************************************************************************** + */ +#include +#include +#include +#include + +#define NR_RESERVED_ENTRIES 8 + +/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ +#define NR_GRANT_FRAMES 4 +#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) + +static grant_entry_t *gnttab_table; +static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; +#ifdef GNT_DEBUG +static char inuse[NR_GRANT_ENTRIES]; +#endif +static __DECLARE_SEMAPHORE_GENERIC(gnttab_sem, 0); + +static void +put_free_entry(grant_ref_t ref) +{ + unsigned long flags; + local_irq_save(flags); +#ifdef GNT_DEBUG + BUG_ON(!inuse[ref]); + inuse[ref] = 0; +#endif + gnttab_list[ref] = gnttab_list[0]; + gnttab_list[0] = ref; + local_irq_restore(flags); + up(&gnttab_sem); +} + +static grant_ref_t +get_free_entry(void) +{ + unsigned int ref; + unsigned long flags; + down(&gnttab_sem); + local_irq_save(flags); + ref = gnttab_list[0]; + BUG_ON(ref < NR_RESERVED_ENTRIES || ref >= NR_GRANT_ENTRIES); + gnttab_list[0] = gnttab_list[ref]; +#ifdef GNT_DEBUG + BUG_ON(inuse[ref]); + inuse[ref] = 1; +#endif + local_irq_restore(flags); + return ref; +} + +grant_ref_t +gnttab_grant_access(domid_t domid, unsigned long frame, int readonly) +{ + grant_ref_t ref; + + ref = get_free_entry(); + gnttab_table[ref].frame = frame; + gnttab_table[ref].domid = domid; + wmb(); + readonly *= GTF_readonly; + gnttab_table[ref].flags = GTF_permit_access | readonly; + + return ref; +} + +grant_ref_t +gnttab_grant_transfer(domid_t domid, unsigned long pfn) +{ + grant_ref_t ref; + + ref = get_free_entry(); + gnttab_table[ref].frame = pfn; + gnttab_table[ref].domid = domid; + wmb(); + gnttab_table[ref].flags = GTF_accept_transfer; + + return ref; +} + +int +gnttab_end_access(grant_ref_t ref) +{ + uint16_t flags, nflags; + + BUG_ON(ref >= NR_GRANT_ENTRIES || ref < NR_RESERVED_ENTRIES); + + nflags = gnttab_table[ref].flags; + do { + if ((flags = nflags) & (GTF_reading|GTF_writing)) { + printk("WARNING: g.e. still in use! (%x)\n", flags); + return 0; + } + } while ((nflags = synch_cmpxchg(&gnttab_table[ref].flags, flags, 0)) != + flags); + + put_free_entry(ref); + return 1; +} + +unsigned long +gnttab_end_transfer(grant_ref_t ref) +{ + unsigned long frame; + uint16_t flags; + + BUG_ON(ref >= NR_GRANT_ENTRIES || ref < NR_RESERVED_ENTRIES); + + while (!((flags = gnttab_table[ref].flags) & GTF_transfer_committed)) { + if (synch_cmpxchg(&gnttab_table[ref].flags, flags, 0) == flags) { + printk("Release unused transfer grant.\n"); + put_free_entry(ref); + return 0; + } + } + + /* If a transfer is in progress then wait until it is completed. */ + while (!(flags & GTF_transfer_completed)) { + flags = gnttab_table[ref].flags; + } + + /* Read the frame number /after/ reading completion status. */ + rmb(); + frame = gnttab_table[ref].frame; + + put_free_entry(ref); + + return frame; +} + +grant_ref_t +gnttab_alloc_and_grant(void **map) +{ + unsigned long mfn; + grant_ref_t gref; + + *map = (void *)alloc_page(); + mfn = virt_to_mfn(*map); + gref = gnttab_grant_access(0, mfn, 0); + return gref; +} + +static const char * const gnttabop_error_msgs[] = GNTTABOP_error_msgs; + +const char * +gnttabop_error(int16_t status) +{ + status = -status; + if (status < 0 || status >= ARRAY_SIZE(gnttabop_error_msgs)) + return "bad status"; + else + return gnttabop_error_msgs[status]; +} + +void +init_gnttab(void) +{ + int i; + +#ifdef GNT_DEBUG + memset(inuse, 1, sizeof(inuse)); +#endif + for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) + put_free_entry(i); + + gnttab_table = arch_init_gnttab(NR_GRANT_FRAMES); + printk("gnttab_table mapped at %p.\n", gnttab_table); +} + +void +fini_gnttab(void) +{ + struct gnttab_setup_table setup; + + setup.dom = DOMID_SELF; + setup.nr_frames = 0; + + HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); +} diff -Nru xen-4.6.0/extras/mini-os/hypervisor.c xen-4.6.5/extras/mini-os/hypervisor.c --- xen-4.6.0/extras/mini-os/hypervisor.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/hypervisor.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,132 @@ +/****************************************************************************** + * hypervisor.c + * + * Communication to/from hypervisor. + * + * Copyright (c) 2002-2003, K A Fraser + * Copyright (c) 2005, Grzegorz Milos, gm281@cam.ac.uk,Intel Research Cambridge + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#define active_evtchns(cpu,sh,idx) \ + ((sh)->evtchn_pending[idx] & \ + ~(sh)->evtchn_mask[idx]) + +int in_callback; + +void do_hypervisor_callback(struct pt_regs *regs) +{ + unsigned long l1, l2, l1i, l2i; + unsigned int port; + int cpu = 0; + shared_info_t *s = HYPERVISOR_shared_info; + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; + + in_callback = 1; + + vcpu_info->evtchn_upcall_pending = 0; + /* NB x86. No need for a barrier here -- XCHG is a barrier on x86. */ +#if !defined(__i386__) && !defined(__x86_64__) + /* Clear master flag /before/ clearing selector flag. */ + wmb(); +#endif + l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); + while ( l1 != 0 ) + { + l1i = __ffs(l1); + l1 &= ~(1UL << l1i); + + while ( (l2 = active_evtchns(cpu, s, l1i)) != 0 ) + { + l2i = __ffs(l2); + l2 &= ~(1UL << l2i); + + port = (l1i * (sizeof(unsigned long) * 8)) + l2i; + do_event(port, regs); + } + } + + in_callback = 0; +} + +void force_evtchn_callback(void) +{ +#ifdef XEN_HAVE_PV_UPCALL_MASK + int save; +#endif + vcpu_info_t *vcpu; + vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; +#ifdef XEN_HAVE_PV_UPCALL_MASK + save = vcpu->evtchn_upcall_mask; +#endif + + while (vcpu->evtchn_upcall_pending) { +#ifdef XEN_HAVE_PV_UPCALL_MASK + vcpu->evtchn_upcall_mask = 1; +#endif + barrier(); + do_hypervisor_callback(NULL); + barrier(); +#ifdef XEN_HAVE_PV_UPCALL_MASK + vcpu->evtchn_upcall_mask = save; + barrier(); +#endif + }; +} + +inline void mask_evtchn(uint32_t port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_set_bit(port, &s->evtchn_mask[0]); +} + +inline void unmask_evtchn(uint32_t port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + vcpu_info_t *vcpu_info = &s->vcpu_info[smp_processor_id()]; + + synch_clear_bit(port, &s->evtchn_mask[0]); + + /* + * The following is basically the equivalent of 'hw_resend_irq'. Just like + * a real IO-APIC we 'lose the interrupt edge' if the channel is masked. + */ + if ( synch_test_bit (port, &s->evtchn_pending[0]) && + !synch_test_and_set_bit(port / (sizeof(unsigned long) * 8), + &vcpu_info->evtchn_pending_sel) ) + { + vcpu_info->evtchn_upcall_pending = 1; +#ifdef XEN_HAVE_PV_UPCALL_MASK + if ( !vcpu_info->evtchn_upcall_mask ) +#endif + force_evtchn_callback(); + } +} + +inline void clear_evtchn(uint32_t port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_clear_bit(port, &s->evtchn_pending[0]); +} diff -Nru xen-4.6.0/extras/mini-os/include/arch/cc.h xen-4.6.5/extras/mini-os/include/arch/cc.h --- xen-4.6.0/extras/mini-os/include/arch/cc.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arch/cc.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,87 @@ +/* + * lwip/arch/cc.h + * + * Compiler-specific types and macros for lwIP running on mini-os + * + * Tim Deegan , July 2007 + */ + +#ifndef __LWIP_ARCH_CC_H__ +#define __LWIP_ARCH_CC_H__ + +/* Typedefs for the types used by lwip - */ +#include +#include +#include +typedef uint8_t u8_t; +typedef int8_t s8_t; +typedef uint16_t u16_t; +typedef int16_t s16_t; +typedef uint32_t u32_t; +typedef int32_t s32_t; +typedef uint64_t u64_t; +typedef int64_t s64_t; +typedef uintptr_t mem_ptr_t; + +typedef uint16_t u_short; + +/* Compiler hints for packing lwip's structures - */ +#define PACK_STRUCT_FIELD(_x) _x +#define PACK_STRUCT_STRUCT __attribute__ ((packed)) +#define PACK_STRUCT_BEGIN +#define PACK_STRUCT_END + +/* Platform specific diagnostic output - */ + +extern void lwip_printk(char *fmt, ...); +#define LWIP_PLATFORM_DIAG(_x) do { lwip_printk _x ; } while (0) + +extern void lwip_die(char *fmt, ...); +#define LWIP_PLATFORM_ASSERT(_x) do { lwip_die(_x); } while(0) + +/* "lightweight" synchronization mechanisms - */ +/* SYS_ARCH_DECL_PROTECT(x) - declare a protection state variable. */ +/* SYS_ARCH_PROTECT(x) - enter protection mode. */ +/* SYS_ARCH_UNPROTECT(x) - leave protection mode. */ + +/* If the compiler does not provide memset() this file must include a */ +/* definition of it, or include a file which defines it. */ +#include + +/* This file must either include a system-local which defines */ +/* the standard *nix error codes, or it should #define LWIP_PROVIDE_ERRNO */ +/* to make lwip/arch.h define the codes which are used throughout. */ +#include + +/* Not required by the docs, but needed for network-order calculations */ +#ifdef HAVE_LIBC +#include +#ifndef BIG_ENDIAN +#error endian.h does not define byte order +#endif +#else +#include +#endif + +#include +#define S16_F PRIi16 +#define U16_F PRIu16 +#define X16_F PRIx16 +#define S32_F PRIi32 +#define U32_F PRIu32 +#define X32_F PRIx32 + +#if 0 +#ifndef DBG_ON +#define DBG_ON LWIP_DBG_ON +#endif +#define LWIP_DEBUG DBG_ON +//#define IP_DEBUG DBG_ON +#define TCP_DEBUG DBG_ON +#define TCP_INPUT_DEBUG DBG_ON +#define TCP_QLEN_DEBUG DBG_ON +#define TCPIP_DEBUG DBG_ON +#define DBG_TYPES_ON DBG_ON +#endif + +#endif /* __LWIP_ARCH_CC_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/arch/perf.h xen-4.6.5/extras/mini-os/include/arch/perf.h --- xen-4.6.0/extras/mini-os/include/arch/perf.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arch/perf.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,15 @@ +/* + * lwip/arch/perf.h + * + * Arch-specific performance measurement for lwIP running on mini-os + * + * Tim Deegan , July 2007 + */ + +#ifndef __LWIP_ARCH_PERF_H__ +#define __LWIP_ARCH_PERF_H__ + +#define PERF_START do { } while(0) +#define PERF_STOP(_x) do { (void)(_x); } while (0) + +#endif /* __LWIP_ARCH_PERF_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/arch/sys_arch.h xen-4.6.5/extras/mini-os/include/arch/sys_arch.h --- xen-4.6.0/extras/mini-os/include/arch/sys_arch.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arch/sys_arch.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,35 @@ +/* + * lwip/arch/sys_arch.h + * + * Arch-specific semaphores and mailboxes for lwIP running on mini-os + * + * Tim Deegan , July 2007 + */ + +#ifndef __LWIP_ARCH_SYS_ARCH_H__ +#define __LWIP_ARCH_SYS_ARCH_H__ + +#include +#include +#include + +typedef struct semaphore *sys_sem_t; +#define SYS_SEM_NULL ((sys_sem_t) NULL) + +struct mbox { + int count; + void **messages; + struct semaphore read_sem; + struct semaphore write_sem; + int writer; + int reader; +}; + +typedef struct mbox *sys_mbox_t; +#define SYS_MBOX_NULL ((sys_mbox_t) 0) + +typedef struct thread *sys_thread_t; + +typedef unsigned long sys_prot_t; + +#endif /*__LWIP_ARCH_SYS_ARCH_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/arm/arch_endian.h xen-4.6.5/extras/mini-os/include/arm/arch_endian.h --- xen-4.6.0/extras/mini-os/include/arm/arch_endian.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arm/arch_endian.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef ARCH_ENDIAN_H +#error "Do not include arch_endian by itself, include endian.h" +#else + +#define __BYTE_ORDER __LITTLE_ENDIAN + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/arm/arch_limits.h xen-4.6.5/extras/mini-os/include/arm/arch_limits.h --- xen-4.6.0/extras/mini-os/include/arm/arch_limits.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arm/arch_limits.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,9 @@ +#ifndef __ARCH_LIMITS_H__ +#define __ARCH_LIMITS_H__ + +#include + +#define __STACK_SIZE_PAGE_ORDER 2 +#define __STACK_SIZE (4 * PAGE_SIZE) + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/arm/arch_mm.h xen-4.6.5/extras/mini-os/include/arm/arch_mm.h --- xen-4.6.0/extras/mini-os/include/arm/arch_mm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arm/arch_mm.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,38 @@ +#ifndef _ARCH_MM_H_ +#define _ARCH_MM_H_ + +typedef uint64_t paddr_t; + +extern char _text, _etext, _erodata, _edata, _end, __bss_start; +extern int _boot_stack[]; +extern int _boot_stack_end[]; +extern uint32_t physical_address_offset; /* Add this to a virtual address to get the physical address (wraps at 4GB) */ + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1 << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define L1_PAGETABLE_SHIFT 12 + +#define to_phys(x) (((paddr_t)(x)+physical_address_offset) & 0xffffffff) +#define to_virt(x) ((void *)(((x)-physical_address_offset) & 0xffffffff)) + +#define PFN_UP(x) (unsigned long)(((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT) +#define PFN_DOWN(x) (unsigned long)((x) >> L1_PAGETABLE_SHIFT) +#define PFN_PHYS(x) ((uint64_t)(x) << L1_PAGETABLE_SHIFT) +#define PHYS_PFN(x) (unsigned long)((x) >> L1_PAGETABLE_SHIFT) + +#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define virt_to_mfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define mfn_to_virt(_mfn) (to_virt(PFN_PHYS(_mfn))) +#define pfn_to_virt(_pfn) (to_virt(PFN_PHYS(_pfn))) + +#define mfn_to_pfn(x) (x) +#define pfn_to_mfn(x) (x) + +#define virtual_to_mfn(_virt) virt_to_mfn(_virt) + +// FIXME +#define map_frames(f, n) (NULL) + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/arm/arch_sched.h xen-4.6.5/extras/mini-os/include/arm/arch_sched.h --- xen-4.6.0/extras/mini-os/include/arm/arch_sched.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arm/arch_sched.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,19 @@ +#ifndef __ARCH_SCHED_H__ +#define __ARCH_SCHED_H__ + +#include "arch_limits.h" + +static inline struct thread* get_current(void) +{ + struct thread **current; + unsigned long sp; + __asm__ __volatile__ ("mov %0, sp":"=r"(sp)); + current = (void *)(unsigned long)(sp & ~(__STACK_SIZE-1)); + return *current; +} + +void __arch_switch_threads(unsigned long *prevctx, unsigned long *nextctx); + +#define arch_switch_threads(prev,next) __arch_switch_threads(&(prev)->sp, &(next)->sp) + +#endif /* __ARCH_SCHED_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/arm/arch_spinlock.h xen-4.6.5/extras/mini-os/include/arm/arch_spinlock.h --- xen-4.6.0/extras/mini-os/include/arm/arch_spinlock.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arm/arch_spinlock.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,36 @@ +#ifndef __ARCH_ASM_SPINLOCK_H +#define __ARCH_ASM_SPINLOCK_H + +#include "os.h" + +#define ARCH_SPIN_LOCK_UNLOCKED { 1 } + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) <= 0) +#define arch_spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) + +static inline void _raw_spin_unlock(spinlock_t *lock) +{ + xchg(&lock->slock, 1); +} + +static inline int _raw_spin_trylock(spinlock_t *lock) +{ + return xchg(&lock->slock, 0) != 0 ? 1 : 0; +} + +static inline void _raw_spin_lock(spinlock_t *lock) +{ + volatile int was_locked; + do { + was_locked = xchg(&lock->slock, 0) == 0 ? 1 : 0; + } while(was_locked); +} + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/arm/arm32/arch_wordsize.h xen-4.6.5/extras/mini-os/include/arm/arm32/arch_wordsize.h --- xen-4.6.0/extras/mini-os/include/arm/arm32/arch_wordsize.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arm/arm32/arch_wordsize.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1 @@ +#define __WORDSIZE 32 diff -Nru xen-4.6.0/extras/mini-os/include/arm/gic.h xen-4.6.5/extras/mini-os/include/arm/gic.h --- xen-4.6.0/extras/mini-os/include/arm/gic.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arm/gic.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1 @@ +void gic_init(void); diff -Nru xen-4.6.0/extras/mini-os/include/arm/hypercall-arm.h xen-4.6.5/extras/mini-os/include/arm/hypercall-arm.h --- xen-4.6.0/extras/mini-os/include/arm/hypercall-arm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arm/hypercall-arm.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,98 @@ +/****************************************************************************** + * hypercall-arm.h + * + * Copied from XenLinux. + * + * Copyright (c) 2002-2004, K A Fraser + * + * 64-bit updates: + * Benjamin Liu + * Jun Nakajima + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_ARM_H__ +#define __HYPERCALL_ARM_H__ + +#include +#include +#include +#include + +int +HYPERVISOR_sched_op( + int cmd, void *arg); + +static inline int +HYPERVISOR_shutdown( + unsigned int reason) +{ + struct sched_shutdown shutdown = { .reason = reason }; + HYPERVISOR_sched_op(SCHEDOP_shutdown, &shutdown); +} + +int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg); + +int +HYPERVISOR_event_channel_op( + int cmd, void *op); + +int +HYPERVISOR_xen_version( + int cmd, void *arg); + +int +HYPERVISOR_console_io( + int cmd, int count, char *str); + +int +HYPERVISOR_physdev_op( + void *physdev_op); + +int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count); + +int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args); + +int +HYPERVISOR_sysctl( + unsigned long op); + +int +HYPERVISOR_domctl( + unsigned long op); + +int +HYPERVISOR_hvm_op( + unsigned long op, void *arg); + +int +HYPERVISOR_xsm_op( + struct xen_flask_op *); + +#endif /* __HYPERCALL_ARM_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/arm/os.h xen-4.6.5/extras/mini-os/include/arm/os.h --- xen-4.6.0/extras/mini-os/include/arm/os.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arm/os.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,216 @@ +#ifndef _OS_H_ +#define _OS_H_ + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include + +void arch_fini(void); +void timer_handler(evtchn_port_t port, struct pt_regs *regs, void *ign); + +extern void *device_tree; + +#define BUG() while(1){asm volatile (".word 0xe7f000f0\n");} /* Undefined instruction; will call our fault handler. */ + +#define smp_processor_id() 0 + +#define barrier() __asm__ __volatile__("": : :"memory") + +extern shared_info_t *HYPERVISOR_shared_info; + +// disable interrupts +static inline void local_irq_disable(void) { + __asm__ __volatile__("cpsid i":::"memory"); +} + +// enable interrupts +static inline void local_irq_enable(void) { + __asm__ __volatile__("cpsie i":::"memory"); +} + +#define local_irq_save(x) { \ + __asm__ __volatile__("mrs %0, cpsr;cpsid i":"=r"(x)::"memory"); \ +} + +#define local_irq_restore(x) { \ + __asm__ __volatile__("msr cpsr_c, %0"::"r"(x):"memory"); \ +} + +#define local_save_flags(x) { \ + __asm__ __volatile__("mrs %0, cpsr":"=r"(x)::"memory"); \ +} + +static inline int irqs_disabled(void) { + int x; + local_save_flags(x); + return x & 0x80; +} + +/* We probably only need "dmb" here, but we'll start by being paranoid. */ +#define mb() __asm__("dsb":::"memory"); +#define rmb() __asm__("dsb":::"memory"); +#define wmb() __asm__("dsb":::"memory"); + +/************************** arm *******************************/ +#ifdef __INSIDE_MINIOS__ +#if defined (__arm__) +#define xchg(ptr,v) __atomic_exchange_n(ptr, v, __ATOMIC_SEQ_CST) + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + * + * This operation is atomic. + * If you need a memory barrier, use synch_test_and_clear_bit instead. + */ +static __inline__ int test_and_clear_bit(int nr, volatile void * addr) +{ + uint8_t *byte = ((uint8_t *)addr) + (nr >> 3); + uint8_t bit = 1 << (nr & 7); + uint8_t orig; + + orig = __atomic_fetch_and(byte, ~bit, __ATOMIC_RELAXED); + + return (orig & bit) != 0; +} + +/** + * Atomically set a bit and return the old value. + * Similar to test_and_clear_bit. + */ +static __inline__ int test_and_set_bit(int nr, volatile void *base) +{ + uint8_t *byte = ((uint8_t *)base) + (nr >> 3); + uint8_t bit = 1 << (nr & 7); + uint8_t orig; + + orig = __atomic_fetch_or(byte, bit, __ATOMIC_RELAXED); + + return (orig & bit) != 0; +} + +/** + * Test whether a bit is set. */ +static __inline__ int test_bit(int nr, const volatile unsigned long *addr) +{ + const uint8_t *ptr = (const uint8_t *) addr; + return ((1 << (nr & 7)) & (ptr[nr >> 3])) != 0; +} + +/** + * Atomically set a bit in memory (like test_and_set_bit but discards result). + */ +static __inline__ void set_bit(int nr, volatile unsigned long *addr) +{ + test_and_set_bit(nr, addr); +} + +/** + * Atomically clear a bit in memory (like test_and_clear_bit but discards result). + */ +static __inline__ void clear_bit(int nr, volatile unsigned long *addr) +{ + test_and_clear_bit(nr, addr); +} + +/** + * __ffs - find first (lowest) set bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __inline__ unsigned long __ffs(unsigned long word) +{ + int clz; + + /* xxxxx10000 = word + * xxxxx01111 = word - 1 + * 0000011111 = word ^ (word - 1) + * 4 = 31 - clz(word ^ (word - 1)) + */ + + __asm__ ( + "sub r0, %[word], #1\n" + "eor r0, r0, %[word]\n" + "clz %[clz], r0\n": + /* Outputs: */ + [clz] "=r"(clz): + /* Inputs: */ + [word] "r"(word): + /* Clobbers: */ + "r0"); + + return 31 - clz; +} + +#else /* ifdef __arm__ */ +#error "Unsupported architecture" +#endif +#endif /* ifdef __INSIDE_MINIOS */ + +/********************* common arm32 and arm64 ****************************/ + +/* If *ptr == old, then store new there (and return new). + * Otherwise, return the old value. + * Atomic. */ +#define synch_cmpxchg(ptr, old, new) \ +({ __typeof__(*ptr) stored = old; \ + __atomic_compare_exchange_n(ptr, &stored, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ? new : old; \ +}) + +/* As test_and_clear_bit, but using __ATOMIC_SEQ_CST */ +static __inline__ int synch_test_and_clear_bit(int nr, volatile void *addr) +{ + uint8_t *byte = ((uint8_t *)addr) + (nr >> 3); + uint8_t bit = 1 << (nr & 7); + uint8_t orig; + + orig = __atomic_fetch_and(byte, ~bit, __ATOMIC_SEQ_CST); + + return (orig & bit) != 0; +} + +/* As test_and_set_bit, but using __ATOMIC_SEQ_CST */ +static __inline__ int synch_test_and_set_bit(int nr, volatile void *base) +{ + uint8_t *byte = ((uint8_t *)base) + (nr >> 3); + uint8_t bit = 1 << (nr & 7); + uint8_t orig; + + orig = __atomic_fetch_or(byte, bit, __ATOMIC_SEQ_CST); + + return (orig & bit) != 0; +} + +/* As set_bit, but using __ATOMIC_SEQ_CST */ +static __inline__ void synch_set_bit(int nr, volatile void *addr) +{ + synch_test_and_set_bit(nr, addr); +} + +/* As clear_bit, but using __ATOMIC_SEQ_CST */ +static __inline__ void synch_clear_bit(int nr, volatile void *addr) +{ + synch_test_and_clear_bit(nr, addr); +} + +/* As test_bit, but with a following memory barrier. */ +static __inline__ int synch_test_bit(int nr, volatile void *addr) +{ + int result; + result = test_bit(nr, addr); + barrier(); + return result; +} + +#endif /* not assembly */ + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/arm/traps.h xen-4.6.5/extras/mini-os/include/arm/traps.h --- xen-4.6.0/extras/mini-os/include/arm/traps.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/arm/traps.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,20 @@ +#ifndef _TRAPS_H_ +#define _TRAPS_H_ + +struct pt_regs { + unsigned long r0; + unsigned long r1; + unsigned long r2; + unsigned long r3; + unsigned long r4; + unsigned long r5; + unsigned long r6; + unsigned long r7; + unsigned long r8; + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; +}; + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/blkfront.h xen-4.6.5/extras/mini-os/include/blkfront.h --- xen-4.6.0/extras/mini-os/include/blkfront.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/blkfront.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,54 @@ +#include +#include +#include +struct blkfront_dev; +struct blkfront_aiocb +{ + struct blkfront_dev *aio_dev; + uint8_t *aio_buf; + size_t aio_nbytes; + off_t aio_offset; + size_t total_bytes; + uint8_t is_write; + void *data; + + grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int n; + + void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret); +}; +struct blkfront_info +{ + uint64_t sectors; + unsigned sector_size; + int mode; + int info; + int barrier; + int flush; +}; +struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info); +#ifdef HAVE_LIBC +#include +/* POSIX IO functions: + * use blkfront_open() to get a file descriptor to the block device + * Don't use the other blkfront posix functions here directly, instead use + * read(), write(), lseek() and fstat() on the file descriptor + */ +int blkfront_open(struct blkfront_dev *dev); +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write); +#define blkfront_posix_write(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 1) +#define blkfront_posix_read(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 0) +int blkfront_posix_fstat(int fd, struct stat* buf); +#endif +void blkfront_aio(struct blkfront_aiocb *aiocbp, int write); +#define blkfront_aio_read(aiocbp) blkfront_aio(aiocbp, 0) +#define blkfront_aio_write(aiocbp) blkfront_aio(aiocbp, 1) +void blkfront_io(struct blkfront_aiocb *aiocbp, int write); +#define blkfront_read(aiocbp) blkfront_io(aiocbp, 0) +#define blkfront_write(aiocbp) blkfront_io(aiocbp, 1) +void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op); +int blkfront_aio_poll(struct blkfront_dev *dev); +void blkfront_sync(struct blkfront_dev *dev); +void shutdown_blkfront(struct blkfront_dev *dev); + +extern struct wait_queue_head blkfront_queue; diff -Nru xen-4.6.0/extras/mini-os/include/byteorder.h xen-4.6.5/extras/mini-os/include/byteorder.h --- xen-4.6.0/extras/mini-os/include/byteorder.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/byteorder.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,36 @@ +#ifndef MINIOS_BYTEORDER_H +#define MINIOS_BYTEORDER_H + +#include +#include + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define be16_to_cpu(v) bswap_16(v) +#define be32_to_cpu(v) bswap_32(v) +#define be64_to_cpu(v) bswap_64(v) + +#define le16_to_cpu(v) (v) +#define le32_to_cpu(v) (v) +#define le64_to_cpu(v) (v) + +#else /*__BIG_ENDIAN*/ +#define be16_to_cpu(v) (v) +#define be32_to_cpu(v) (v) +#define be64_to_cpu(v) (v) + +#define le16_to_cpu(v) bswap_16(v) +#define le32_to_cpu(v) bswap_32(v) +#define le64_to_cpu(v) bswap_64(v) + +#endif + +#define cpu_to_be16(v) be16_to_cpu(v) +#define cpu_to_be32(v) be32_to_cpu(v) +#define cpu_to_be64(v) be64_to_cpu(v) + +#define cpu_to_le16(v) le16_to_cpu(v) +#define cpu_to_le32(v) le32_to_cpu(v) +#define cpu_to_le64(v) le64_to_cpu(v) + + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/byteswap.h xen-4.6.5/extras/mini-os/include/byteswap.h --- xen-4.6.0/extras/mini-os/include/byteswap.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/byteswap.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,39 @@ +#ifndef _BYTESWAP_H_ +#define _BYTESWAP_H_ + +/* Unfortunately not provided by newlib. */ + +#include + +#define bswap_16(x) ((uint16_t)( \ + (((uint16_t)(x) & (uint16_t)0x00ffU) << 8) | \ + (((uint16_t)(x) & (uint16_t)0xff00U) >> 8))) + +/* Use gcc optimized versions if they exist */ +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +#define bswap_32(v) __builtin_bswap32(v) +#define bswap_64(v) __builtin_bswap64(v) +#else + +#define bswap_32(x) ((uint32_t)( \ + (((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) | \ + (((uint32_t)(x) & (uint32_t)0x0000ff00UL) << 8) | \ + (((uint32_t)(x) & (uint32_t)0x00ff0000UL) >> 8) | \ + (((uint32_t)(x) & (uint32_t)0xff000000UL) >> 24))) + +#define bswap_64(x) ((uint64_t)( \ + (((uint64_t)(x) & (uint64_t)0x00000000000000ffULL) << 56) | \ + (((uint64_t)(x) & (uint64_t)0x000000000000ff00ULL) << 40) | \ + (((uint64_t)(x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \ + (((uint64_t)(x) & (uint64_t)0x00000000ff000000ULL) << 8) | \ + (((uint64_t)(x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \ + (((uint64_t)(x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \ + (((uint64_t)(x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \ + (((uint64_t)(x) & (uint64_t)0xff00000000000000ULL) >> 56))) + +#endif + + + + +#endif /* _BYTESWAP_H */ diff -Nru xen-4.6.0/extras/mini-os/include/compiler.h xen-4.6.5/extras/mini-os/include/compiler.h --- xen-4.6.0/extras/mini-os/include/compiler.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/compiler.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,10 @@ +#ifndef __MINIOS_COMPILER_H_ +#define __MINIOS_COMPILER_H_ + +#if __GNUC__ == 2 && __GNUC_MINOR__ < 96 +#define __builtin_expect(x, expected_value) (x) +#endif +#define unlikely(x) __builtin_expect(!!(x),0) +#define likely(x) __builtin_expect(!!(x),1) + +#endif /* __MINIOS_COMPILER_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/console.h xen-4.6.5/extras/mini-os/include/console.h --- xen-4.6.0/extras/mini-os/include/console.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/console.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,89 @@ +/* + **************************************************************************** + * (C) 2006 - Grzegorz Milos - Cambridge University + **************************************************************************** + * + * File: console.h + * Author: Grzegorz Milos + * Changes: + * + * Date: Mar 2006 + * + * Environment: Xen Minimal OS + * Description: Console interface. + * + * Handles console I/O. Defines printk. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef _LIB_CONSOLE_H_ +#define _LIB_CONSOLE_H_ + +#include +#include +#include +#include +#include +#include +#include + +struct consfront_dev { + domid_t dom; + + struct xencons_interface *ring; + grant_ref_t ring_ref; + evtchn_port_t evtchn; + + char *nodename; + char *backend; + + xenbus_event_queue events; + +#ifdef HAVE_LIBC + int fd; +#endif +}; + + + +void print(int direct, const char *fmt, va_list args); +void printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +void xprintk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); + +#define tprintk(_fmt, _args...) printk("[%s] " _fmt, current->name, ##_args) + +void xencons_rx(char *buf, unsigned len, struct pt_regs *regs); +void xencons_tx(void); + +void init_console(void); +void console_print(struct consfront_dev *dev, char *data, int length); +void fini_console(struct consfront_dev *dev); + +/* Low level functions defined in xencons_ring.c */ +extern struct wait_queue_head console_queue; +struct consfront_dev *xencons_ring_init(void); +struct consfront_dev *init_consfront(char *_nodename); +int xencons_ring_send(struct consfront_dev *dev, const char *data, unsigned len); +int xencons_ring_send_no_notify(struct consfront_dev *dev, const char *data, unsigned len); +int xencons_ring_avail(struct consfront_dev *dev); +int xencons_ring_recv(struct consfront_dev *dev, char *data, unsigned len); +void free_consfront(struct consfront_dev *dev); + +#endif /* _LIB_CONSOLE_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/ctype.h xen-4.6.5/extras/mini-os/include/ctype.h --- xen-4.6.0/extras/mini-os/include/ctype.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/ctype.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,60 @@ +#ifndef _CTYPE_H +#define _CTYPE_H + +#ifdef HAVE_LIBC +#include_next +#else +/* + * NOTE! This ctype does not handle EOF like the standard C + * library is required to. + */ + +#define _U 0x01 /* upper */ +#define _L 0x02 /* lower */ +#define _D 0x04 /* digit */ +#define _C 0x08 /* cntrl */ +#define _P 0x10 /* punct */ +#define _S 0x20 /* white space (space/lf/tab) */ +#define _X 0x40 /* hex digit */ +#define _SP 0x80 /* hard space (0x20) */ + + +extern unsigned char _ctype[]; + +#define __ismask(x) (_ctype[(int)(unsigned char)(x)]) + +#define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) +#define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) +#define iscntrl(c) ((__ismask(c)&(_C)) != 0) +#define isdigit(c) ((__ismask(c)&(_D)) != 0) +#define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) +#define islower(c) ((__ismask(c)&(_L)) != 0) +#define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) +#define ispunct(c) ((__ismask(c)&(_P)) != 0) +#define isspace(c) ((__ismask(c)&(_S)) != 0) +#define isupper(c) ((__ismask(c)&(_U)) != 0) +#define isxdigit(c) ((__ismask(c)&(_D|_X)) != 0) + +#define isascii(c) (((unsigned char)(c))<=0x7f) +#define toascii(c) (((unsigned char)(c))&0x7f) + +static inline unsigned char __tolower(unsigned char c) +{ + if (isupper(c)) + c -= 'A'-'a'; + return c; +} + +static inline unsigned char __toupper(unsigned char c) +{ + if (islower(c)) + c -= 'a'-'A'; + return c; +} + +#define tolower(c) __tolower(c) +#define toupper(c) __toupper(c) + +#endif + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/endian.h xen-4.6.5/extras/mini-os/include/endian.h --- xen-4.6.0/extras/mini-os/include/endian.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/endian.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,19 @@ +#ifndef _ENDIAN_H_ +#define _ENDIAN_H_ + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 +#define __PDP_ENDIAN 3412 + +#define ARCH_ENDIAN_H +/* This will define __BYTE_ORDER for the current arch */ +#include +#undef ARCH_ENDIAN_H + +#include + +#define BYTE_ORDER __BYTE_ORDER +#define BIG_ENDIAN __BIG_ENDIAN +#define LITTLE_ENDIAN __LITTLE_ENDIAN + +#endif /* endian.h */ diff -Nru xen-4.6.0/extras/mini-os/include/err.h xen-4.6.5/extras/mini-os/include/err.h --- xen-4.6.0/extras/mini-os/include/err.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/err.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,31 @@ +#ifndef _ERR_H +#define _ERR_H + +#include + +/* + * Kernel pointers have redundant information, so we can use a + * scheme where we can return either an error code or a dentry + * pointer with the same return value. + * + * This should be a per-architecture thing, to allow different + * error and pointer decisions. + */ +#define IS_ERR_VALUE(x) ((x) > (unsigned long)-1000L) + +static inline void *ERR_PTR(long error) +{ + return (void *) error; +} + +static inline long PTR_ERR(const void *ptr) +{ + return (long) ptr; +} + +static inline long IS_ERR(const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +#endif /* _LINUX_ERR_H */ diff -Nru xen-4.6.0/extras/mini-os/include/errno-base.h xen-4.6.5/extras/mini-os/include/errno-base.h --- xen-4.6.0/extras/mini-os/include/errno-base.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/errno-base.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,39 @@ +#ifndef _ERRNO_BASE_H +#define _ERRNO_BASE_H + +#define EPERM 1 /* Operation not permitted */ +#define ENOENT 2 /* No such file or directory */ +#define ESRCH 3 /* No such process */ +#define EINTR 4 /* Interrupted system call */ +#define EIO 5 /* I/O error */ +#define ENXIO 6 /* No such device or address */ +#define E2BIG 7 /* Argument list too long */ +#define ENOEXEC 8 /* Exec format error */ +#define EBADF 9 /* Bad file number */ +#define ECHILD 10 /* No child processes */ +#define EAGAIN 11 /* Try again */ +#define ENOMEM 12 /* Out of memory */ +#define EACCES 13 /* Permission denied */ +#define EFAULT 14 /* Bad address */ +#define ENOTBLK 15 /* Block device required */ +#define EBUSY 16 /* Device or resource busy */ +#define EEXIST 17 /* File exists */ +#define EXDEV 18 /* Cross-device link */ +#define ENODEV 19 /* No such device */ +#define ENOTDIR 20 /* Not a directory */ +#define EISDIR 21 /* Is a directory */ +#define EINVAL 22 /* Invalid argument */ +#define ENFILE 23 /* File table overflow */ +#define EMFILE 24 /* Too many open files */ +#define ENOTTY 25 /* Not a typewriter */ +#define ETXTBSY 26 /* Text file busy */ +#define EFBIG 27 /* File too large */ +#define ENOSPC 28 /* No space left on device */ +#define ESPIPE 29 /* Illegal seek */ +#define EROFS 30 /* Read-only file system */ +#define EMLINK 31 /* Too many links */ +#define EPIPE 32 /* Broken pipe */ +#define EDOM 33 /* Math argument out of domain of func */ +#define ERANGE 34 /* Math result not representable */ + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/errno.h xen-4.6.5/extras/mini-os/include/errno.h --- xen-4.6.0/extras/mini-os/include/errno.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/errno.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,122 @@ +#ifndef _ERRNO_H +#define _ERRNO_H + +#include + +typedef int error_t; + +#define EDEADLK 35 /* Resource deadlock would occur */ +#define ENAMETOOLONG 36 /* File name too long */ +#define ENOLCK 37 /* No record locks available */ +#define ENOSYS 38 /* Function not implemented */ +#define ENOTEMPTY 39 /* Directory not empty */ +#define ELOOP 40 /* Too many symbolic links encountered */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ENOMSG 42 /* No message of desired type */ +#define EIDRM 43 /* Identifier removed */ +#define ECHRNG 44 /* Channel number out of range */ +#define EL2NSYNC 45 /* Level 2 not synchronized */ +#define EL3HLT 46 /* Level 3 halted */ +#define EL3RST 47 /* Level 3 reset */ +#define ELNRNG 48 /* Link number out of range */ +#define EUNATCH 49 /* Protocol driver not attached */ +#define ENOCSI 50 /* No CSI structure available */ +#define EL2HLT 51 /* Level 2 halted */ +#define EBADE 52 /* Invalid exchange */ +#define EBADR 53 /* Invalid request descriptor */ +#define EXFULL 54 /* Exchange full */ +#define ENOANO 55 /* No anode */ +#define EBADRQC 56 /* Invalid request code */ +#define EBADSLT 57 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EMULTIHOP 72 /* Multihop attempted */ +#define EDOTDOT 73 /* RFS specific error */ +#define EBADMSG 74 /* Not a data message */ +#define EOVERFLOW 75 /* Value too large for defined data type */ +#define ENOTUNIQ 76 /* Name not unique on network */ +#define EBADFD 77 /* File descriptor in bad state */ +#define EREMCHG 78 /* Remote address changed */ +#define ELIBACC 79 /* Can not access a needed shared library */ +#define ELIBBAD 80 /* Accessing a corrupted shared library */ +#define ELIBSCN 81 /* .lib section in a.out corrupted */ +#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 83 /* Cannot exec a shared library directly */ +#define EILSEQ 84 /* Illegal byte sequence */ +#define ERESTART 85 /* Interrupted system call should be restarted */ +#define ESTRPIPE 86 /* Streams pipe error */ +#define EUSERS 87 /* Too many users */ +#define ENOTSOCK 88 /* Socket operation on non-socket */ +#define EDESTADDRREQ 89 /* Destination address required */ +#define EMSGSIZE 90 /* Message too long */ +#define EPROTOTYPE 91 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 92 /* Protocol not available */ +#define EPROTONOSUPPORT 93 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ +#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ +#define ENOTSUP EOPNOTSUPP +#define EPFNOSUPPORT 96 /* Protocol family not supported */ +#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ +#define EADDRINUSE 98 /* Address already in use */ +#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ +#define ENETDOWN 100 /* Network is down */ +#define ENETUNREACH 101 /* Network is unreachable */ +#define ENETRESET 102 /* Network dropped connection because of reset */ +#define ECONNABORTED 103 /* Software caused connection abort */ +#define ECONNRESET 104 /* Connection reset by peer */ +#define ENOBUFS 105 /* No buffer space available */ +#define EISCONN 106 /* Transport endpoint is already connected */ +#define ENOTCONN 107 /* Transport endpoint is not connected */ +#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 109 /* Too many references: cannot splice */ +#define ETIMEDOUT 110 /* Connection timed out */ +#define ECONNREFUSED 111 /* Connection refused */ +#define EHOSTDOWN 112 /* Host is down */ +#define EHOSTUNREACH 113 /* No route to host */ +#define EALREADY 114 /* Operation already in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ +#define ESTALE 116 /* Stale NFS file handle */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EDQUOT 122 /* Quota exceeded */ + +#define ENOMEDIUM 123 /* No medium found */ +#define EMEDIUMTYPE 124 /* Wrong medium type */ +#define ECANCELED 125 /* Operation Canceled */ +#define ENOKEY 126 /* Required key not available */ +#define EKEYEXPIRED 127 /* Key has expired */ +#define EKEYREVOKED 128 /* Key has been revoked */ +#define EKEYREJECTED 129 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 130 /* Owner died */ +#define ENOTRECOVERABLE 131 /* State not recoverable */ + + +#define EFTYPE 132 /* Inappropriate file type or format */ + +#ifdef HAVE_LIBC +#include +extern int errno; +#define ERRNO +#define errno (get_current()->reent._errno) +#endif + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/events.h xen-4.6.5/extras/mini-os/include/events.h --- xen-4.6.0/extras/mini-os/include/events.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/events.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,59 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Reseach Cambridge + **************************************************************************** + * + * File: events.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: Jul 2003, changes Jun 2005 + * + * Environment: Xen Minimal OS + * Description: Deals with events on the event channels + * + **************************************************************************** + */ + +#ifndef _EVENTS_H_ +#define _EVENTS_H_ + +#include +#include + +typedef void (*evtchn_handler_t)(evtchn_port_t, struct pt_regs *, void *); + +/* prototypes */ +void arch_init_events(void); + +/* Called by fini_events to close any ports opened by arch-specific code. */ +void arch_unbind_ports(void); + +void arch_fini_events(void); + +int do_event(evtchn_port_t port, struct pt_regs *regs); +evtchn_port_t bind_virq(uint32_t virq, evtchn_handler_t handler, void *data); +evtchn_port_t bind_pirq(uint32_t pirq, int will_share, evtchn_handler_t handler, void *data); +evtchn_port_t bind_evtchn(evtchn_port_t port, evtchn_handler_t handler, + void *data); +void unbind_evtchn(evtchn_port_t port); +void init_events(void); +int evtchn_alloc_unbound(domid_t pal, evtchn_handler_t handler, + void *data, evtchn_port_t *port); +int evtchn_bind_interdomain(domid_t pal, evtchn_port_t remote_port, + evtchn_handler_t handler, void *data, + evtchn_port_t *local_port); +int evtchn_get_peercontext(evtchn_port_t local_port, char *ctx, int size); +void unbind_all_ports(void); + +static inline int notify_remote_via_evtchn(evtchn_port_t port) +{ + evtchn_send_t op; + op.port = port; + return HYPERVISOR_event_channel_op(EVTCHNOP_send, &op); +} + +void fini_events(void); + +#endif /* _EVENTS_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/fbfront.h xen-4.6.5/extras/mini-os/include/fbfront.h --- xen-4.6.0/extras/mini-os/include/fbfront.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/fbfront.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,46 @@ +#include +#include +#include +#include + +/* from */ +#ifndef BTN_LEFT +#define BTN_LEFT 0x110 +#endif +#ifndef BTN_RIGHT +#define BTN_RIGHT 0x111 +#endif +#ifndef BTN_MIDDLE +#define BTN_MIDDLE 0x112 +#endif +#ifndef KEY_Q +#define KEY_Q 16 +#endif +#ifndef KEY_MAX +#define KEY_MAX 0x1ff +#endif + + +struct kbdfront_dev; +struct kbdfront_dev *init_kbdfront(char *nodename, int abs_pointer); +#ifdef HAVE_LIBC +int kbdfront_open(struct kbdfront_dev *dev); +#endif + +int kbdfront_receive(struct kbdfront_dev *dev, union xenkbd_in_event *buf, int n); +extern struct wait_queue_head kbdfront_queue; + +void shutdown_kbdfront(struct kbdfront_dev *dev); + + +struct fbfront_dev *init_fbfront(char *nodename, unsigned long *mfns, int width, int height, int depth, int stride, int n); +#ifdef HAVE_LIBC +int fbfront_open(struct fbfront_dev *dev); +#endif + +int fbfront_receive(struct fbfront_dev *dev, union xenfb_in_event *buf, int n); +extern struct wait_queue_head fbfront_queue; +void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int height); +void fbfront_resize(struct fbfront_dev *dev, int width, int height, int stride, int depth, int offset); + +void shutdown_fbfront(struct fbfront_dev *dev); diff -Nru xen-4.6.0/extras/mini-os/include/fcntl.h xen-4.6.5/extras/mini-os/include/fcntl.h --- xen-4.6.0/extras/mini-os/include/fcntl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/fcntl.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,99 @@ +#ifndef _I386_FCNTL_H +#define _I386_FCNTL_H + +#ifdef HAVE_LIBC +#include_next +#else + +/* open/fcntl - O_SYNC is only implemented on blocks devices and on files + located on an ext2 file system */ +#define O_ACCMODE 0003 +#define O_RDONLY 00 +#define O_WRONLY 01 +#define O_RDWR 02 +#define O_CREAT 0100 /* not fcntl */ +#define O_EXCL 0200 /* not fcntl */ +#define O_NOCTTY 0400 /* not fcntl */ +#define O_TRUNC 01000 /* not fcntl */ +#define O_APPEND 02000 +#define O_NONBLOCK 04000 +#define O_NDELAY O_NONBLOCK +#define O_SYNC 010000 +#define FASYNC 020000 /* fcntl, for BSD compatibility */ +#define O_DIRECT 040000 /* direct disk access hint */ +#define O_LARGEFILE 0100000 +#define O_DIRECTORY 0200000 /* must be a directory */ +#define O_NOFOLLOW 0400000 /* don't follow links */ +#define O_NOATIME 01000000 + +#define F_DUPFD 0 /* dup */ +#define F_GETFD 1 /* get close_on_exec */ +#define F_SETFD 2 /* set/clear close_on_exec */ +#define F_GETFL 3 /* get file->f_flags */ +#define F_SETFL 4 /* set file->f_flags */ +#define F_GETLK 5 +#define F_SETLK 6 +#define F_SETLKW 7 + +#define F_SETOWN 8 /* for sockets. */ +#define F_GETOWN 9 /* for sockets. */ +#define F_SETSIG 10 /* for sockets. */ +#define F_GETSIG 11 /* for sockets. */ + +#define F_GETLK64 12 /* using 'struct flock64' */ +#define F_SETLK64 13 +#define F_SETLKW64 14 + +/* for F_[GET|SET]FL */ +#define FD_CLOEXEC 1 /* actually anything with low bit set goes */ + +/* for posix fcntl() and lockf() */ +#define F_RDLCK 0 +#define F_WRLCK 1 +#define F_UNLCK 2 + +/* for old implementation of bsd flock () */ +#define F_EXLCK 4 /* or 3 */ +#define F_SHLCK 8 /* or 4 */ + +/* for leases */ +#define F_INPROGRESS 16 + +/* operations for bsd flock(), also used by the kernel implementation */ +#define LOCK_SH 1 /* shared lock */ +#define LOCK_EX 2 /* exclusive lock */ +#define LOCK_NB 4 /* or'd with one of the above to prevent + blocking */ +#define LOCK_UN 8 /* remove lock */ + +#define LOCK_MAND 32 /* This is a mandatory flock */ +#define LOCK_READ 64 /* ... Which allows concurrent read operations */ +#define LOCK_WRITE 128 /* ... Which allows concurrent write operations */ +#define LOCK_RW 192 /* ... Which allows concurrent read & write ops */ + +/* +struct flock { + short l_type; + short l_whence; + off_t l_start; + off_t l_len; + pid_t l_pid; +}; + +struct flock64 { + short l_type; + short l_whence; + loff_t l_start; + loff_t l_len; + pid_t l_pid; +}; + +#define F_LINUX_SPECIFIC_BASE 1024 +*/ + +#endif + +int open(const char *path, int flags, ...) asm("open64"); +int fcntl(int fd, int cmd, ...); + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/gntmap.h xen-4.6.5/extras/mini-os/include/gntmap.h --- xen-4.6.0/extras/mini-os/include/gntmap.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/gntmap.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,35 @@ +#ifndef __GNTMAP_H__ +#define __GNTMAP_H__ + +#include + +/* + * Please consider struct gntmap opaque. If instead you choose to disregard + * this message, I insist that you keep an eye out for raptors. + */ +struct gntmap { + int nentries; + struct gntmap_entry *entries; +}; + +int +gntmap_set_max_grants(struct gntmap *map, int count); + +int +gntmap_munmap(struct gntmap *map, unsigned long start_address, int count); + +void* +gntmap_map_grant_refs(struct gntmap *map, + uint32_t count, + uint32_t *domids, + int domids_stride, + uint32_t *refs, + int writable); + +void +gntmap_init(struct gntmap *map); + +void +gntmap_fini(struct gntmap *map); + +#endif /* !__GNTMAP_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/gnttab.h xen-4.6.5/extras/mini-os/include/gnttab.h --- xen-4.6.0/extras/mini-os/include/gnttab.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/gnttab.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,17 @@ +#ifndef __GNTTAB_H__ +#define __GNTTAB_H__ + +#include + +void init_gnttab(void); +grant_ref_t gnttab_alloc_and_grant(void **map); +grant_ref_t gnttab_grant_access(domid_t domid, unsigned long frame, + int readonly); +grant_ref_t gnttab_grant_transfer(domid_t domid, unsigned long pfn); +unsigned long gnttab_end_transfer(grant_ref_t gref); +int gnttab_end_access(grant_ref_t ref); +const char *gnttabop_error(int16_t status); +void fini_gnttab(void); +grant_entry_t *arch_init_gnttab(int nr_grant_frames); + +#endif /* !__GNTTAB_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/hypervisor.h xen-4.6.5/extras/mini-os/include/hypervisor.h --- xen-4.6.0/extras/mini-os/include/hypervisor.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/hypervisor.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,48 @@ +/****************************************************************************** + * hypervisor.h + * + * Hypervisor handling. + * + * + * Copyright (c) 2002, K A Fraser + * Copyright (c) 2005, Grzegorz Milos + * Updates: Aravindh Puthiyaparambil + */ + +#ifndef _HYPERVISOR_H_ +#define _HYPERVISOR_H_ + +#include +#include +#if defined(__i386__) +#include +#elif defined(__x86_64__) +#include +#elif defined(__arm__) || defined(__aarch64__) +#include +#else +#error "Unsupported architecture" +#endif +#include + +/* + * a placeholder for the start of day information passed up from the hypervisor + */ +union start_info_union +{ + start_info_t start_info; + char padding[512]; +}; +extern union start_info_union start_info_union; +#define start_info (start_info_union.start_info) + +/* hypervisor.c */ +void force_evtchn_callback(void); +void do_hypervisor_callback(struct pt_regs *regs); +void mask_evtchn(uint32_t port); +void unmask_evtchn(uint32_t port); +void clear_evtchn(uint32_t port); + +extern int in_callback; + +#endif /* __HYPERVISOR_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/ioremap.h xen-4.6.5/extras/mini-os/include/ioremap.h --- xen-4.6.0/extras/mini-os/include/ioremap.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/ioremap.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,33 @@ +/** + * Copyright (C) 2009 Netronome Systems, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _IOREMAP_H_ +#define _IOREMAP_H_ + +void *ioremap(unsigned long phys_addr, unsigned long size); +void *ioremap_nocache(unsigned long phys_addr, unsigned long size); +void iounmap(void *virt_addr, unsigned long size); + +#endif /* _IOREMAP_H_ */ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4 indent-tabs-mode:nil -*- */ diff -Nru xen-4.6.0/extras/mini-os/include/iorw.h xen-4.6.5/extras/mini-os/include/iorw.h --- xen-4.6.0/extras/mini-os/include/iorw.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/iorw.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,16 @@ +#ifndef MINIOS_IORW_H +#define MINIOS_IORW_H + +#include + +void iowrite8(volatile void* addr, uint8_t val); +void iowrite16(volatile void* addr, uint16_t val); +void iowrite32(volatile void* addr, uint32_t val); +void iowrite64(volatile void* addr, uint64_t val); + +uint8_t ioread8(volatile void* addr); +uint16_t ioread16(volatile void* addr); +uint32_t ioread32(volatile void* addr); +uint64_t ioread64(volatile void* addr); + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/kernel.h xen-4.6.5/extras/mini-os/include/kernel.h --- xen-4.6.0/extras/mini-os/include/kernel.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/kernel.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,9 @@ +#ifndef _KERNEL_H_ +#define _KERNEL_H_ + +void start_kernel(void); +void do_exit(void) __attribute__((noreturn)); +void arch_do_exit(void); +void stop_kernel(void); + +#endif /* _KERNEL_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/lib-gpl.h xen-4.6.5/extras/mini-os/include/lib-gpl.h --- xen-4.6.0/extras/mini-os/include/lib-gpl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/lib-gpl.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,59 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: lib.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Random useful library functions, from Linux' + * include/linux/kernel.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _LIB_GPL_H_ +#define _LIB_GPL_H_ + +#ifndef HAVE_LIBC +/* printing */ +extern unsigned long simple_strtoul(const char *,char **,unsigned int); +extern long simple_strtol(const char *,char **,unsigned int); +extern unsigned long long simple_strtoull(const char *,char **,unsigned int); +extern long long simple_strtoll(const char *,char **,unsigned int); + +extern int sprintf(char * buf, const char * fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int vsprintf(char *buf, const char *, va_list) + __attribute__ ((format (printf, 2, 0))); +extern int snprintf(char * buf, size_t size, const char * fmt, ...) + __attribute__ ((format (printf, 3, 4))); +extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) + __attribute__ ((format (printf, 3, 0))); +extern int scnprintf(char * buf, size_t size, const char * fmt, ...) + __attribute__ ((format (printf, 3, 4))); +extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) + __attribute__ ((format (printf, 3, 0))); +extern int sscanf(const char *, const char *, ...) + __attribute__ ((format (scanf, 2, 3))); +extern int vsscanf(const char *, const char *, va_list) + __attribute__ ((format (scanf, 2, 0))); +#endif + +#endif /* _LIB_GPL_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/lib.h xen-4.6.5/extras/mini-os/include/lib.h --- xen-4.6.0/extras/mini-os/include/lib.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/lib.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,230 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: lib.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Random useful library functions, contains some freebsd stuff + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + * + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _LIB_H_ +#define _LIB_H_ + +#include +#include +#include +#include +#include "gntmap.h" + +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#define BUILD_BUG_ON(cond) ({ _Static_assert(!(cond), "!(" #cond ")"); }) +#define BUILD_BUG_ON_ZERO(cond) \ + sizeof(struct { _Static_assert(!(cond), "!(" #cond ")"); }) +#else +#define BUILD_BUG_ON_ZERO(cond) sizeof(struct { int:-!!(cond); }) +#define BUILD_BUG_ON(cond) ((void)BUILD_BUG_ON_ZERO(cond)) +#endif + +#ifdef HAVE_LIBC +#include +#include +#else +#include +#endif + +#ifdef HAVE_LIBC +#include +#else +/* string and memory manipulation */ + +/* + * From: + * @(#)libkern.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ +int memcmp(const void *b1, const void *b2, size_t len); + +char *strcat(char * __restrict, const char * __restrict); +int strcmp(const char *, const char *); +char *strcpy(char * __restrict, const char * __restrict); + +char *strdup(const char *__restrict); + +size_t strlen(const char *); + +int strncmp(const char *, const char *, size_t); +char *strncpy(char * __restrict, const char * __restrict, size_t); + +char *strstr(const char *, const char *); + +void *memset(void *, int, size_t); + +char *strchr(const char *p, int ch); +char *strrchr(const char *p, int ch); + +/* From: + * @(#)systm.h 8.7 (Berkeley) 3/29/95 + * $FreeBSD$ + */ +void *memcpy(void *to, const void *from, size_t len); + +size_t strnlen(const char *, size_t); +#endif + +#include + +#define RAND_MIX 2654435769U + +int rand(void); + +#include + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define ASSERT(x) \ +do { \ + if (!(x)) { \ + printk("ASSERTION FAILED: %s at %s:%d.\n", \ + # x , \ + __FILE__, \ + __LINE__); \ + BUG(); \ + } \ +} while(0) + +#define BUG_ON(x) ASSERT(!(x)) + +/* Consistency check as much as possible. */ +void sanity_check(void); + +#ifdef HAVE_LIBC +enum fd_type { + FTYPE_NONE = 0, + FTYPE_CONSOLE, + FTYPE_FILE, + FTYPE_XENBUS, + FTYPE_XC, + FTYPE_EVTCHN, + FTYPE_GNTMAP, + FTYPE_SOCKET, + FTYPE_TAP, + FTYPE_BLK, + FTYPE_KBD, + FTYPE_FB, + FTYPE_MEM, + FTYPE_SAVEFILE, + FTYPE_TPMFRONT, + FTYPE_TPM_TIS, +}; + +LIST_HEAD(evtchn_port_list, evtchn_port_info); + +struct evtchn_port_info { + LIST_ENTRY(evtchn_port_info) list; + evtchn_port_t port; + unsigned long pending; + int bound; +}; + +extern struct file { + enum fd_type type; + union { + struct { + /* lwIP fd */ + int fd; + } socket; + struct { + /* FS import fd */ + int fd; + off_t offset; + } file; + struct { + struct evtchn_port_list ports; + } evtchn; + struct gntmap gntmap; + struct { + struct netfront_dev *dev; + } tap; + struct { + struct blkfront_dev *dev; + off_t offset; + } blk; + struct { + struct kbdfront_dev *dev; + } kbd; + struct { + struct fbfront_dev *dev; + } fb; + struct { + struct consfront_dev *dev; + } cons; +#ifdef CONFIG_TPMFRONT + struct { + struct tpmfront_dev *dev; + int respgot; + off_t offset; + } tpmfront; +#endif +#ifdef CONFIG_TPM_TIS + struct { + struct tpm_chip *dev; + int respgot; + off_t offset; + } tpm_tis; +#endif +#ifdef CONFIG_XENBUS + struct { + /* To each xenbus FD is associated a queue of watch events for this + * FD. */ + xenbus_event_queue events; + } xenbus; +#endif + }; + int read; /* maybe available for read */ +} files[]; + +int alloc_fd(enum fd_type type); +void close_all_files(void); +extern struct thread *main_thread; +void sparse(unsigned long data, size_t size); +#endif + +#endif /* _LIB_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/linux/types.h xen-4.6.5/extras/mini-os/include/linux/types.h --- xen-4.6.0/extras/mini-os/include/linux/types.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/linux/types.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,5 @@ +#ifndef _LINUX_TYPES_H_ +#define _LINUX_TYPES_H_ +#include +typedef uint64_t __u64; +#endif /* _LINUX_TYPES_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/lwipopts.h xen-4.6.5/extras/mini-os/include/lwipopts.h --- xen-4.6.0/extras/mini-os/include/lwipopts.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/lwipopts.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,23 @@ +/* + * lwipopts.h + * + * Configuration for lwIP running on mini-os + * + * Tim Deegan , July 2007 + */ + +#ifndef __LWIP_LWIPOPTS_H__ +#define __LWIP_LWIPOPTS_H__ + +#define SYS_LIGHTWEIGHT_PROT 1 +#define MEM_LIBC_MALLOC 1 +#define LWIP_TIMEVAL_PRIVATE 0 +#define LWIP_DHCP 1 +#define LWIP_COMPAT_SOCKETS 0 +#define LWIP_IGMP 1 +#define LWIP_USE_HEAP_FROM_INTERRUPT 1 +#define MEMP_NUM_SYS_TIMEOUT 10 +#define TCP_SND_BUF 3000 +#define TCP_MSS 1500 + +#endif /* __LWIP_LWIPOPTS_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/minios-external/bsd-COPYRIGHT xen-4.6.5/extras/mini-os/include/minios-external/bsd-COPYRIGHT --- xen-4.6.0/extras/mini-os/include/minios-external/bsd-COPYRIGHT 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/minios-external/bsd-COPYRIGHT 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,126 @@ +# $FreeBSD$ +# @(#)COPYRIGHT 8.2 (Berkeley) 3/21/94 + +The compilation of software known as FreeBSD is distributed under the +following terms: + +Copyright (c) 1992-2011 The FreeBSD Project. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +The 4.4BSD and 4.4BSD-Lite software is distributed under the following +terms: + +All of the documentation and software included in the 4.4BSD and 4.4BSD-Lite +Releases is copyrighted by The Regents of the University of California. + +Copyright 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994 + The Regents of the University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: +This product includes software developed by the University of +California, Berkeley and its contributors. +4. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +The Institute of Electrical and Electronics Engineers and the American +National Standards Committee X3, on Information Processing Systems have +given us permission to reprint portions of their documentation. + +In the following statement, the phrase ``this text'' refers to portions +of the system documentation. + +Portions of this text are reprinted and reproduced in electronic form in +the second BSD Networking Software Release, from IEEE Std 1003.1-1988, IEEE +Standard Portable Operating System Interface for Computer Environments +(POSIX), copyright C 1988 by the Institute of Electrical and Electronics +Engineers, Inc. In the event of any discrepancy between these versions +and the original IEEE Standard, the original IEEE Standard is the referee +document. + +In the following statement, the phrase ``This material'' refers to portions +of the system documentation. + +This material is reproduced with permission from American National +Standards Committee X3, on Information Processing Systems. Computer and +Business Equipment Manufacturers Association (CBEMA), 311 First St., NW, +Suite 500, Washington, DC 20001-2178. The developmental work of +Programming Language C was completed by the X3J11 Technical Committee. + +The views and conclusions contained in the software and documentation are +those of the authors and should not be interpreted as representing official +policies, either expressed or implied, of the Regents of the University +of California. + + +NOTE: The copyright of UC Berkeley's Berkeley Software Distribution ("BSD") +source has been updated. The copyright addendum may be found at +ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change and is +included below. + +July 22, 1999 + +To All Licensees, Distributors of Any Version of BSD: + +As you know, certain of the Berkeley Software Distribution ("BSD") source +code files require that further distributions of products containing all or +portions of the software, acknowledge within their advertising materials +that such products contain software developed by UC Berkeley and its +contributors. + +Specifically, the provision reads: + +" * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors." + +Effective immediately, licensees and distributors are no longer required to +include the acknowledgement within advertising materials. Accordingly, the +foregoing paragraph of those BSD Unix files containing it is hereby deleted +in its entirety. + +William Hoskins +Director, Office of Technology Licensing +University of California, Berkeley diff -Nru xen-4.6.0/extras/mini-os/include/minios-external/bsd-queue.3 xen-4.6.5/extras/mini-os/include/minios-external/bsd-queue.3 --- xen-4.6.0/extras/mini-os/include/minios-external/bsd-queue.3 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/minios-external/bsd-queue.3 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,1044 @@ +.\" Copyright (c) 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)queue.3 8.2 (Berkeley) 1/24/94 +.\" $FreeBSD$ +.\" +.Dd May 13, 2011 +.Dt QUEUE 3 +.Os +.Sh NAME +.Nm SLIST_EMPTY , +.Nm SLIST_ENTRY , +.Nm SLIST_FIRST , +.Nm SLIST_FOREACH , +.Nm SLIST_FOREACH_SAFE , +.Nm SLIST_HEAD , +.Nm SLIST_HEAD_INITIALIZER , +.Nm SLIST_INIT , +.Nm SLIST_INSERT_AFTER , +.Nm SLIST_INSERT_HEAD , +.Nm SLIST_NEXT , +.Nm SLIST_REMOVE_AFTER , +.Nm SLIST_REMOVE_HEAD , +.Nm SLIST_REMOVE , +.Nm SLIST_SWAP , +.Nm STAILQ_CONCAT , +.Nm STAILQ_EMPTY , +.Nm STAILQ_ENTRY , +.Nm STAILQ_FIRST , +.Nm STAILQ_FOREACH , +.Nm STAILQ_FOREACH_SAFE , +.Nm STAILQ_HEAD , +.Nm STAILQ_HEAD_INITIALIZER , +.Nm STAILQ_INIT , +.Nm STAILQ_INSERT_AFTER , +.Nm STAILQ_INSERT_HEAD , +.Nm STAILQ_INSERT_TAIL , +.Nm STAILQ_LAST , +.Nm STAILQ_NEXT , +.Nm STAILQ_REMOVE_AFTER , +.Nm STAILQ_REMOVE_HEAD , +.Nm STAILQ_REMOVE , +.Nm STAILQ_SWAP , +.Nm LIST_EMPTY , +.Nm LIST_ENTRY , +.Nm LIST_FIRST , +.Nm LIST_FOREACH , +.Nm LIST_FOREACH_SAFE , +.Nm LIST_HEAD , +.Nm LIST_HEAD_INITIALIZER , +.Nm LIST_INIT , +.Nm LIST_INSERT_AFTER , +.Nm LIST_INSERT_BEFORE , +.Nm LIST_INSERT_HEAD , +.Nm LIST_NEXT , +.Nm LIST_REMOVE , +.Nm LIST_SWAP , +.Nm TAILQ_CONCAT , +.Nm TAILQ_EMPTY , +.Nm TAILQ_ENTRY , +.Nm TAILQ_FIRST , +.Nm TAILQ_FOREACH , +.Nm TAILQ_FOREACH_SAFE , +.Nm TAILQ_FOREACH_REVERSE , +.Nm TAILQ_FOREACH_REVERSE_SAFE , +.Nm TAILQ_HEAD , +.Nm TAILQ_HEAD_INITIALIZER , +.Nm TAILQ_INIT , +.Nm TAILQ_INSERT_AFTER , +.Nm TAILQ_INSERT_BEFORE , +.Nm TAILQ_INSERT_HEAD , +.Nm TAILQ_INSERT_TAIL , +.Nm TAILQ_LAST , +.Nm TAILQ_NEXT , +.Nm TAILQ_PREV , +.Nm TAILQ_REMOVE , +.Nm TAILQ_SWAP +.Nd implementations of singly-linked lists, singly-linked tail queues, +lists and tail queues +.Sh SYNOPSIS +.In sys/queue.h +.\" +.Fn SLIST_EMPTY "SLIST_HEAD *head" +.Fn SLIST_ENTRY "TYPE" +.Fn SLIST_FIRST "SLIST_HEAD *head" +.Fn SLIST_FOREACH "TYPE *var" "SLIST_HEAD *head" "SLIST_ENTRY NAME" +.Fn SLIST_FOREACH_SAFE "TYPE *var" "SLIST_HEAD *head" "SLIST_ENTRY NAME" "TYPE *temp_var" +.Fn SLIST_HEAD "HEADNAME" "TYPE" +.Fn SLIST_HEAD_INITIALIZER "SLIST_HEAD head" +.Fn SLIST_INIT "SLIST_HEAD *head" +.Fn SLIST_INSERT_AFTER "TYPE *listelm" "TYPE *elm" "SLIST_ENTRY NAME" +.Fn SLIST_INSERT_HEAD "SLIST_HEAD *head" "TYPE *elm" "SLIST_ENTRY NAME" +.Fn SLIST_NEXT "TYPE *elm" "SLIST_ENTRY NAME" +.Fn SLIST_REMOVE_AFTER "TYPE *elm" "SLIST_ENTRY NAME" +.Fn SLIST_REMOVE_HEAD "SLIST_HEAD *head" "SLIST_ENTRY NAME" +.Fn SLIST_REMOVE "SLIST_HEAD *head" "TYPE *elm" "TYPE" "SLIST_ENTRY NAME" +.Fn SLIST_SWAP "SLIST_HEAD *head1" "SLIST_HEAD *head2" "SLIST_ENTRY NAME" +.\" +.Fn STAILQ_CONCAT "STAILQ_HEAD *head1" "STAILQ_HEAD *head2" +.Fn STAILQ_EMPTY "STAILQ_HEAD *head" +.Fn STAILQ_ENTRY "TYPE" +.Fn STAILQ_FIRST "STAILQ_HEAD *head" +.Fn STAILQ_FOREACH "TYPE *var" "STAILQ_HEAD *head" "STAILQ_ENTRY NAME" +.Fn STAILQ_FOREACH_SAFE "TYPE *var" "STAILQ_HEAD *head" "STAILQ_ENTRY NAME" "TYPE *temp_var" +.Fn STAILQ_HEAD "HEADNAME" "TYPE" +.Fn STAILQ_HEAD_INITIALIZER "STAILQ_HEAD head" +.Fn STAILQ_INIT "STAILQ_HEAD *head" +.Fn STAILQ_INSERT_AFTER "STAILQ_HEAD *head" "TYPE *listelm" "TYPE *elm" "STAILQ_ENTRY NAME" +.Fn STAILQ_INSERT_HEAD "STAILQ_HEAD *head" "TYPE *elm" "STAILQ_ENTRY NAME" +.Fn STAILQ_INSERT_TAIL "STAILQ_HEAD *head" "TYPE *elm" "STAILQ_ENTRY NAME" +.Fn STAILQ_LAST "STAILQ_HEAD *head" "TYPE" "STAILQ_ENTRY NAME" +.Fn STAILQ_NEXT "TYPE *elm" "STAILQ_ENTRY NAME" +.Fn STAILQ_REMOVE_AFTER "STAILQ_HEAD *head" "TYPE *elm" "STAILQ_ENTRY NAME" +.Fn STAILQ_REMOVE_HEAD "STAILQ_HEAD *head" "STAILQ_ENTRY NAME" +.Fn STAILQ_REMOVE "STAILQ_HEAD *head" "TYPE *elm" "TYPE" "STAILQ_ENTRY NAME" +.Fn STAILQ_SWAP "STAILQ_HEAD *head1" "STAILQ_HEAD *head2" "STAILQ_ENTRY NAME" +.\" +.Fn LIST_EMPTY "LIST_HEAD *head" +.Fn LIST_ENTRY "TYPE" +.Fn LIST_FIRST "LIST_HEAD *head" +.Fn LIST_FOREACH "TYPE *var" "LIST_HEAD *head" "LIST_ENTRY NAME" +.Fn LIST_FOREACH_SAFE "TYPE *var" "LIST_HEAD *head" "LIST_ENTRY NAME" "TYPE *temp_var" +.Fn LIST_HEAD "HEADNAME" "TYPE" +.Fn LIST_HEAD_INITIALIZER "LIST_HEAD head" +.Fn LIST_INIT "LIST_HEAD *head" +.Fn LIST_INSERT_AFTER "TYPE *listelm" "TYPE *elm" "LIST_ENTRY NAME" +.Fn LIST_INSERT_BEFORE "TYPE *listelm" "TYPE *elm" "LIST_ENTRY NAME" +.Fn LIST_INSERT_HEAD "LIST_HEAD *head" "TYPE *elm" "LIST_ENTRY NAME" +.Fn LIST_NEXT "TYPE *elm" "LIST_ENTRY NAME" +.Fn LIST_REMOVE "TYPE *elm" "LIST_ENTRY NAME" +.Fn LIST_SWAP "LIST_HEAD *head1" "LIST_HEAD *head2" "TYPE" "LIST_ENTRY NAME" +.\" +.Fn TAILQ_CONCAT "TAILQ_HEAD *head1" "TAILQ_HEAD *head2" "TAILQ_ENTRY NAME" +.Fn TAILQ_EMPTY "TAILQ_HEAD *head" +.Fn TAILQ_ENTRY "TYPE" +.Fn TAILQ_FIRST "TAILQ_HEAD *head" +.Fn TAILQ_FOREACH "TYPE *var" "TAILQ_HEAD *head" "TAILQ_ENTRY NAME" +.Fn TAILQ_FOREACH_SAFE "TYPE *var" "TAILQ_HEAD *head" "TAILQ_ENTRY NAME" "TYPE *temp_var" +.Fn TAILQ_FOREACH_REVERSE "TYPE *var" "TAILQ_HEAD *head" "HEADNAME" "TAILQ_ENTRY NAME" +.Fn TAILQ_FOREACH_REVERSE_SAFE "TYPE *var" "TAILQ_HEAD *head" "HEADNAME" "TAILQ_ENTRY NAME" "TYPE *temp_var" +.Fn TAILQ_HEAD "HEADNAME" "TYPE" +.Fn TAILQ_HEAD_INITIALIZER "TAILQ_HEAD head" +.Fn TAILQ_INIT "TAILQ_HEAD *head" +.Fn TAILQ_INSERT_AFTER "TAILQ_HEAD *head" "TYPE *listelm" "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_INSERT_BEFORE "TYPE *listelm" "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_INSERT_HEAD "TAILQ_HEAD *head" "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_INSERT_TAIL "TAILQ_HEAD *head" "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_LAST "TAILQ_HEAD *head" "HEADNAME" +.Fn TAILQ_NEXT "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_PREV "TYPE *elm" "HEADNAME" "TAILQ_ENTRY NAME" +.Fn TAILQ_REMOVE "TAILQ_HEAD *head" "TYPE *elm" "TAILQ_ENTRY NAME" +.Fn TAILQ_SWAP "TAILQ_HEAD *head1" "TAILQ_HEAD *head2" "TYPE" "TAILQ_ENTRY NAME" +.\" +.Sh DESCRIPTION +These macros define and operate on four types of data structures: +singly-linked lists, singly-linked tail queues, lists, and tail queues. +All four structures support the following functionality: +.Bl -enum -compact -offset indent +.It +Insertion of a new entry at the head of the list. +.It +Insertion of a new entry after any element in the list. +.It +O(1) removal of an entry from the head of the list. +.It +Forward traversal through the list. +.It +Swawpping the contents of two lists. +.El +.Pp +Singly-linked lists are the simplest of the four data structures +and support only the above functionality. +Singly-linked lists are ideal for applications with large datasets +and few or no removals, +or for implementing a LIFO queue. +Singly-linked lists add the following functionality: +.Bl -enum -compact -offset indent +.It +O(n) removal of any entry in the list. +.El +.Pp +Singly-linked tail queues add the following functionality: +.Bl -enum -compact -offset indent +.It +Entries can be added at the end of a list. +.It +O(n) removal of any entry in the list. +.It +They may be concatenated. +.El +However: +.Bl -enum -compact -offset indent +.It +All list insertions must specify the head of the list. +.It +Each head entry requires two pointers rather than one. +.It +Code size is about 15% greater and operations run about 20% slower +than singly-linked lists. +.El +.Pp +Singly-linked tailqs are ideal for applications with large datasets and +few or no removals, +or for implementing a FIFO queue. +.Pp +All doubly linked types of data structures (lists and tail queues) +additionally allow: +.Bl -enum -compact -offset indent +.It +Insertion of a new entry before any element in the list. +.It +O(1) removal of any entry in the list. +.El +However: +.Bl -enum -compact -offset indent +.It +Each element requires two pointers rather than one. +.It +Code size and execution time of operations (except for removal) is about +twice that of the singly-linked data-structures. +.El +.Pp +Linked lists are the simplest of the doubly linked data structures and support +only the above functionality over singly-linked lists. +.Pp +Tail queues add the following functionality: +.Bl -enum -compact -offset indent +.It +Entries can be added at the end of a list. +.It +They may be traversed backwards, from tail to head. +.It +They may be concatenated. +.El +However: +.Bl -enum -compact -offset indent +.It +All list insertions and removals must specify the head of the list. +.It +Each head entry requires two pointers rather than one. +.It +Code size is about 15% greater and operations run about 20% slower +than singly-linked lists. +.El +.Pp +In the macro definitions, +.Fa TYPE +is the name of a user defined structure, +that must contain a field of type +.Li SLIST_ENTRY , +.Li STAILQ_ENTRY , +.Li LIST_ENTRY , +or +.Li TAILQ_ENTRY , +named +.Fa NAME . +The argument +.Fa HEADNAME +is the name of a user defined structure that must be declared +using the macros +.Li SLIST_HEAD , +.Li STAILQ_HEAD , +.Li LIST_HEAD , +or +.Li TAILQ_HEAD . +See the examples below for further explanation of how these +macros are used. +.Sh SINGLY-LINKED LISTS +A singly-linked list is headed by a structure defined by the +.Nm SLIST_HEAD +macro. +This structure contains a single pointer to the first element +on the list. +The elements are singly linked for minimum space and pointer manipulation +overhead at the expense of O(n) removal for arbitrary elements. +New elements can be added to the list after an existing element or +at the head of the list. +An +.Fa SLIST_HEAD +structure is declared as follows: +.Bd -literal -offset indent +SLIST_HEAD(HEADNAME, TYPE) head; +.Ed +.Pp +where +.Fa HEADNAME +is the name of the structure to be defined, and +.Fa TYPE +is the type of the elements to be linked into the list. +A pointer to the head of the list can later be declared as: +.Bd -literal -offset indent +struct HEADNAME *headp; +.Ed +.Pp +(The names +.Li head +and +.Li headp +are user selectable.) +.Pp +The macro +.Nm SLIST_HEAD_INITIALIZER +evaluates to an initializer for the list +.Fa head . +.Pp +The macro +.Nm SLIST_EMPTY +evaluates to true if there are no elements in the list. +.Pp +The macro +.Nm SLIST_ENTRY +declares a structure that connects the elements in +the list. +.Pp +The macro +.Nm SLIST_FIRST +returns the first element in the list or NULL if the list is empty. +.Pp +The macro +.Nm SLIST_FOREACH +traverses the list referenced by +.Fa head +in the forward direction, assigning each element in +turn to +.Fa var . +.Pp +The macro +.Nm SLIST_FOREACH_SAFE +traverses the list referenced by +.Fa head +in the forward direction, assigning each element in +turn to +.Fa var . +However, unlike +.Fn SLIST_FOREACH +here it is permitted to both remove +.Fa var +as well as free it from within the loop safely without interfering with the +traversal. +.Pp +The macro +.Nm SLIST_INIT +initializes the list referenced by +.Fa head . +.Pp +The macro +.Nm SLIST_INSERT_HEAD +inserts the new element +.Fa elm +at the head of the list. +.Pp +The macro +.Nm SLIST_INSERT_AFTER +inserts the new element +.Fa elm +after the element +.Fa listelm . +.Pp +The macro +.Nm SLIST_NEXT +returns the next element in the list. +.Pp +The macro +.Nm SLIST_REMOVE_AFTER +removes the element after +.Fa elm +from the list. Unlike +.Fa SLIST_REMOVE , +this macro does not traverse the entire list. +.Pp +The macro +.Nm SLIST_REMOVE_HEAD +removes the element +.Fa elm +from the head of the list. +For optimum efficiency, +elements being removed from the head of the list should explicitly use +this macro instead of the generic +.Fa SLIST_REMOVE +macro. +.Pp +The macro +.Nm SLIST_REMOVE +removes the element +.Fa elm +from the list. +.Pp +The macro +.Nm SLIST_SWAP +swaps the contents of +.Fa head1 +and +.Fa head2 . +.Sh SINGLY-LINKED LIST EXAMPLE +.Bd -literal +SLIST_HEAD(slisthead, entry) head = + SLIST_HEAD_INITIALIZER(head); +struct slisthead *headp; /* Singly-linked List head. */ +struct entry { + ... + SLIST_ENTRY(entry) entries; /* Singly-linked List. */ + ... +} *n1, *n2, *n3, *np; + +SLIST_INIT(&head); /* Initialize the list. */ + +n1 = malloc(sizeof(struct entry)); /* Insert at the head. */ +SLIST_INSERT_HEAD(&head, n1, entries); + +n2 = malloc(sizeof(struct entry)); /* Insert after. */ +SLIST_INSERT_AFTER(n1, n2, entries); + +SLIST_REMOVE(&head, n2, entry, entries);/* Deletion. */ +free(n2); + +n3 = SLIST_FIRST(&head); +SLIST_REMOVE_HEAD(&head, entries); /* Deletion from the head. */ +free(n3); + /* Forward traversal. */ +SLIST_FOREACH(np, &head, entries) + np-> ... + /* Safe forward traversal. */ +SLIST_FOREACH_SAFE(np, &head, entries, np_temp) { + np->do_stuff(); + ... + SLIST_REMOVE(&head, np, entry, entries); + free(np); +} + +while (!SLIST_EMPTY(&head)) { /* List Deletion. */ + n1 = SLIST_FIRST(&head); + SLIST_REMOVE_HEAD(&head, entries); + free(n1); +} +.Ed +.Sh SINGLY-LINKED TAIL QUEUES +A singly-linked tail queue is headed by a structure defined by the +.Nm STAILQ_HEAD +macro. +This structure contains a pair of pointers, +one to the first element in the tail queue and the other to +the last element in the tail queue. +The elements are singly linked for minimum space and pointer +manipulation overhead at the expense of O(n) removal for arbitrary +elements. +New elements can be added to the tail queue after an existing element, +at the head of the tail queue, or at the end of the tail queue. +A +.Fa STAILQ_HEAD +structure is declared as follows: +.Bd -literal -offset indent +STAILQ_HEAD(HEADNAME, TYPE) head; +.Ed +.Pp +where +.Li HEADNAME +is the name of the structure to be defined, and +.Li TYPE +is the type of the elements to be linked into the tail queue. +A pointer to the head of the tail queue can later be declared as: +.Bd -literal -offset indent +struct HEADNAME *headp; +.Ed +.Pp +(The names +.Li head +and +.Li headp +are user selectable.) +.Pp +The macro +.Nm STAILQ_HEAD_INITIALIZER +evaluates to an initializer for the tail queue +.Fa head . +.Pp +The macro +.Nm STAILQ_CONCAT +concatenates the tail queue headed by +.Fa head2 +onto the end of the one headed by +.Fa head1 +removing all entries from the former. +.Pp +The macro +.Nm STAILQ_EMPTY +evaluates to true if there are no items on the tail queue. +.Pp +The macro +.Nm STAILQ_ENTRY +declares a structure that connects the elements in +the tail queue. +.Pp +The macro +.Nm STAILQ_FIRST +returns the first item on the tail queue or NULL if the tail queue +is empty. +.Pp +The macro +.Nm STAILQ_FOREACH +traverses the tail queue referenced by +.Fa head +in the forward direction, assigning each element +in turn to +.Fa var . +.Pp +The macro +.Nm STAILQ_FOREACH_SAFE +traverses the tail queue referenced by +.Fa head +in the forward direction, assigning each element +in turn to +.Fa var . +However, unlike +.Fn STAILQ_FOREACH +here it is permitted to both remove +.Fa var +as well as free it from within the loop safely without interfering with the +traversal. +.Pp +The macro +.Nm STAILQ_INIT +initializes the tail queue referenced by +.Fa head . +.Pp +The macro +.Nm STAILQ_INSERT_HEAD +inserts the new element +.Fa elm +at the head of the tail queue. +.Pp +The macro +.Nm STAILQ_INSERT_TAIL +inserts the new element +.Fa elm +at the end of the tail queue. +.Pp +The macro +.Nm STAILQ_INSERT_AFTER +inserts the new element +.Fa elm +after the element +.Fa listelm . +.Pp +The macro +.Nm STAILQ_LAST +returns the last item on the tail queue. +If the tail queue is empty the return value is +.Dv NULL . +.Pp +The macro +.Nm STAILQ_NEXT +returns the next item on the tail queue, or NULL this item is the last. +.Pp +The macro +.Nm STAILQ_REMOVE_AFTER +removes the element after +.Fa elm +from the tail queue. Unlike +.Fa STAILQ_REMOVE , +this macro does not traverse the entire tail queue. +.Pp +The macro +.Nm STAILQ_REMOVE_HEAD +removes the element at the head of the tail queue. +For optimum efficiency, +elements being removed from the head of the tail queue should +use this macro explicitly rather than the generic +.Fa STAILQ_REMOVE +macro. +.Pp +The macro +.Nm STAILQ_REMOVE +removes the element +.Fa elm +from the tail queue. +.Pp +The macro +.Nm STAILQ_SWAP +swaps the contents of +.Fa head1 +and +.Fa head2 . +.Sh SINGLY-LINKED TAIL QUEUE EXAMPLE +.Bd -literal +STAILQ_HEAD(stailhead, entry) head = + STAILQ_HEAD_INITIALIZER(head); +struct stailhead *headp; /* Singly-linked tail queue head. */ +struct entry { + ... + STAILQ_ENTRY(entry) entries; /* Tail queue. */ + ... +} *n1, *n2, *n3, *np; + +STAILQ_INIT(&head); /* Initialize the queue. */ + +n1 = malloc(sizeof(struct entry)); /* Insert at the head. */ +STAILQ_INSERT_HEAD(&head, n1, entries); + +n1 = malloc(sizeof(struct entry)); /* Insert at the tail. */ +STAILQ_INSERT_TAIL(&head, n1, entries); + +n2 = malloc(sizeof(struct entry)); /* Insert after. */ +STAILQ_INSERT_AFTER(&head, n1, n2, entries); + /* Deletion. */ +STAILQ_REMOVE(&head, n2, entry, entries); +free(n2); + /* Deletion from the head. */ +n3 = STAILQ_FIRST(&head); +STAILQ_REMOVE_HEAD(&head, entries); +free(n3); + /* Forward traversal. */ +STAILQ_FOREACH(np, &head, entries) + np-> ... + /* Safe forward traversal. */ +STAILQ_FOREACH_SAFE(np, &head, entries, np_temp) { + np->do_stuff(); + ... + STAILQ_REMOVE(&head, np, entry, entries); + free(np); +} + /* TailQ Deletion. */ +while (!STAILQ_EMPTY(&head)) { + n1 = STAILQ_FIRST(&head); + STAILQ_REMOVE_HEAD(&head, entries); + free(n1); +} + /* Faster TailQ Deletion. */ +n1 = STAILQ_FIRST(&head); +while (n1 != NULL) { + n2 = STAILQ_NEXT(n1, entries); + free(n1); + n1 = n2; +} +STAILQ_INIT(&head); +.Ed +.Sh LISTS +A list is headed by a structure defined by the +.Nm LIST_HEAD +macro. +This structure contains a single pointer to the first element +on the list. +The elements are doubly linked so that an arbitrary element can be +removed without traversing the list. +New elements can be added to the list after an existing element, +before an existing element, or at the head of the list. +A +.Fa LIST_HEAD +structure is declared as follows: +.Bd -literal -offset indent +LIST_HEAD(HEADNAME, TYPE) head; +.Ed +.Pp +where +.Fa HEADNAME +is the name of the structure to be defined, and +.Fa TYPE +is the type of the elements to be linked into the list. +A pointer to the head of the list can later be declared as: +.Bd -literal -offset indent +struct HEADNAME *headp; +.Ed +.Pp +(The names +.Li head +and +.Li headp +are user selectable.) +.Pp +The macro +.Nm LIST_HEAD_INITIALIZER +evaluates to an initializer for the list +.Fa head . +.Pp +The macro +.Nm LIST_EMPTY +evaluates to true if there are no elements in the list. +.Pp +The macro +.Nm LIST_ENTRY +declares a structure that connects the elements in +the list. +.Pp +The macro +.Nm LIST_FIRST +returns the first element in the list or NULL if the list +is empty. +.Pp +The macro +.Nm LIST_FOREACH +traverses the list referenced by +.Fa head +in the forward direction, assigning each element in turn to +.Fa var . +.Pp +The macro +.Nm LIST_FOREACH_SAFE +traverses the list referenced by +.Fa head +in the forward direction, assigning each element in turn to +.Fa var . +However, unlike +.Fn LIST_FOREACH +here it is permitted to both remove +.Fa var +as well as free it from within the loop safely without interfering with the +traversal. +.Pp +The macro +.Nm LIST_INIT +initializes the list referenced by +.Fa head . +.Pp +The macro +.Nm LIST_INSERT_HEAD +inserts the new element +.Fa elm +at the head of the list. +.Pp +The macro +.Nm LIST_INSERT_AFTER +inserts the new element +.Fa elm +after the element +.Fa listelm . +.Pp +The macro +.Nm LIST_INSERT_BEFORE +inserts the new element +.Fa elm +before the element +.Fa listelm . +.Pp +The macro +.Nm LIST_NEXT +returns the next element in the list, or NULL if this is the last. +.Pp +The macro +.Nm LIST_REMOVE +removes the element +.Fa elm +from the list. +.Pp +The macro +.Nm LIST_SWAP +swaps the contents of +.Fa head1 +and +.Fa head2 . +.Sh LIST EXAMPLE +.Bd -literal +LIST_HEAD(listhead, entry) head = + LIST_HEAD_INITIALIZER(head); +struct listhead *headp; /* List head. */ +struct entry { + ... + LIST_ENTRY(entry) entries; /* List. */ + ... +} *n1, *n2, *n3, *np, *np_temp; + +LIST_INIT(&head); /* Initialize the list. */ + +n1 = malloc(sizeof(struct entry)); /* Insert at the head. */ +LIST_INSERT_HEAD(&head, n1, entries); + +n2 = malloc(sizeof(struct entry)); /* Insert after. */ +LIST_INSERT_AFTER(n1, n2, entries); + +n3 = malloc(sizeof(struct entry)); /* Insert before. */ +LIST_INSERT_BEFORE(n2, n3, entries); + +LIST_REMOVE(n2, entries); /* Deletion. */ +free(n2); + /* Forward traversal. */ +LIST_FOREACH(np, &head, entries) + np-> ... + + /* Safe forward traversal. */ +LIST_FOREACH_SAFE(np, &head, entries, np_temp) { + np->do_stuff(); + ... + LIST_REMOVE(np, entries); + free(np); +} + +while (!LIST_EMPTY(&head)) { /* List Deletion. */ + n1 = LIST_FIRST(&head); + LIST_REMOVE(n1, entries); + free(n1); +} + +n1 = LIST_FIRST(&head); /* Faster List Deletion. */ +while (n1 != NULL) { + n2 = LIST_NEXT(n1, entries); + free(n1); + n1 = n2; +} +LIST_INIT(&head); +.Ed +.Sh TAIL QUEUES +A tail queue is headed by a structure defined by the +.Nm TAILQ_HEAD +macro. +This structure contains a pair of pointers, +one to the first element in the tail queue and the other to +the last element in the tail queue. +The elements are doubly linked so that an arbitrary element can be +removed without traversing the tail queue. +New elements can be added to the tail queue after an existing element, +before an existing element, at the head of the tail queue, +or at the end of the tail queue. +A +.Fa TAILQ_HEAD +structure is declared as follows: +.Bd -literal -offset indent +TAILQ_HEAD(HEADNAME, TYPE) head; +.Ed +.Pp +where +.Li HEADNAME +is the name of the structure to be defined, and +.Li TYPE +is the type of the elements to be linked into the tail queue. +A pointer to the head of the tail queue can later be declared as: +.Bd -literal -offset indent +struct HEADNAME *headp; +.Ed +.Pp +(The names +.Li head +and +.Li headp +are user selectable.) +.Pp +The macro +.Nm TAILQ_HEAD_INITIALIZER +evaluates to an initializer for the tail queue +.Fa head . +.Pp +The macro +.Nm TAILQ_CONCAT +concatenates the tail queue headed by +.Fa head2 +onto the end of the one headed by +.Fa head1 +removing all entries from the former. +.Pp +The macro +.Nm TAILQ_EMPTY +evaluates to true if there are no items on the tail queue. +.Pp +The macro +.Nm TAILQ_ENTRY +declares a structure that connects the elements in +the tail queue. +.Pp +The macro +.Nm TAILQ_FIRST +returns the first item on the tail queue or NULL if the tail queue +is empty. +.Pp +The macro +.Nm TAILQ_FOREACH +traverses the tail queue referenced by +.Fa head +in the forward direction, assigning each element in turn to +.Fa var . +.Fa var +is set to +.Dv NULL +if the loop completes normally, or if there were no elements. +.Pp +The macro +.Nm TAILQ_FOREACH_REVERSE +traverses the tail queue referenced by +.Fa head +in the reverse direction, assigning each element in turn to +.Fa var . +.Pp +The macros +.Nm TAILQ_FOREACH_SAFE +and +.Nm TAILQ_FOREACH_REVERSE_SAFE +traverse the list referenced by +.Fa head +in the forward or reverse direction respectively, +assigning each element in turn to +.Fa var . +However, unlike their unsafe counterparts, +.Nm TAILQ_FOREACH +and +.Nm TAILQ_FOREACH_REVERSE +permit to both remove +.Fa var +as well as free it from within the loop safely without interfering with the +traversal. +.Pp +The macro +.Nm TAILQ_INIT +initializes the tail queue referenced by +.Fa head . +.Pp +The macro +.Nm TAILQ_INSERT_HEAD +inserts the new element +.Fa elm +at the head of the tail queue. +.Pp +The macro +.Nm TAILQ_INSERT_TAIL +inserts the new element +.Fa elm +at the end of the tail queue. +.Pp +The macro +.Nm TAILQ_INSERT_AFTER +inserts the new element +.Fa elm +after the element +.Fa listelm . +.Pp +The macro +.Nm TAILQ_INSERT_BEFORE +inserts the new element +.Fa elm +before the element +.Fa listelm . +.Pp +The macro +.Nm TAILQ_LAST +returns the last item on the tail queue. +If the tail queue is empty the return value is +.Dv NULL . +.Pp +The macro +.Nm TAILQ_NEXT +returns the next item on the tail queue, or NULL if this item is the last. +.Pp +The macro +.Nm TAILQ_PREV +returns the previous item on the tail queue, or NULL if this item +is the first. +.Pp +The macro +.Nm TAILQ_REMOVE +removes the element +.Fa elm +from the tail queue. +.Pp +The macro +.Nm TAILQ_SWAP +swaps the contents of +.Fa head1 +and +.Fa head2 . +.Sh TAIL QUEUE EXAMPLE +.Bd -literal +TAILQ_HEAD(tailhead, entry) head = + TAILQ_HEAD_INITIALIZER(head); +struct tailhead *headp; /* Tail queue head. */ +struct entry { + ... + TAILQ_ENTRY(entry) entries; /* Tail queue. */ + ... +} *n1, *n2, *n3, *np; + +TAILQ_INIT(&head); /* Initialize the queue. */ + +n1 = malloc(sizeof(struct entry)); /* Insert at the head. */ +TAILQ_INSERT_HEAD(&head, n1, entries); + +n1 = malloc(sizeof(struct entry)); /* Insert at the tail. */ +TAILQ_INSERT_TAIL(&head, n1, entries); + +n2 = malloc(sizeof(struct entry)); /* Insert after. */ +TAILQ_INSERT_AFTER(&head, n1, n2, entries); + +n3 = malloc(sizeof(struct entry)); /* Insert before. */ +TAILQ_INSERT_BEFORE(n2, n3, entries); + +TAILQ_REMOVE(&head, n2, entries); /* Deletion. */ +free(n2); + /* Forward traversal. */ +TAILQ_FOREACH(np, &head, entries) + np-> ... + /* Safe forward traversal. */ +TAILQ_FOREACH_SAFE(np, &head, entries, np_temp) { + np->do_stuff(); + ... + TAILQ_REMOVE(&head, np, entries); + free(np); +} + /* Reverse traversal. */ +TAILQ_FOREACH_REVERSE(np, &head, tailhead, entries) + np-> ... + /* TailQ Deletion. */ +while (!TAILQ_EMPTY(&head)) { + n1 = TAILQ_FIRST(&head); + TAILQ_REMOVE(&head, n1, entries); + free(n1); +} + /* Faster TailQ Deletion. */ +n1 = TAILQ_FIRST(&head); +while (n1 != NULL) { + n2 = TAILQ_NEXT(n1, entries); + free(n1); + n1 = n2; +} +TAILQ_INIT(&head); +.Ed +.Sh SEE ALSO +.Xr tree 3 +.Sh HISTORY +The +.Nm queue +functions first appeared in +.Bx 4.4 . diff -Nru xen-4.6.0/extras/mini-os/include/minios-external/bsd-sys-queue.h xen-4.6.5/extras/mini-os/include/minios-external/bsd-sys-queue.h --- xen-4.6.0/extras/mini-os/include/minios-external/bsd-sys-queue.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/minios-external/bsd-sys-queue.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,637 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + * $FreeBSD$ + */ + +#ifndef _SYS_QUEUE_H_ +#define _SYS_QUEUE_H_ + +#include + +/* + * This file defines four types of data structures: singly-linked lists, + * singly-linked tail queues, lists and tail queues. + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A singly-linked tail queue is headed by a pair of pointers, one to the + * head of the list and the other to the tail of the list. The elements are + * singly linked for minimum space and pointer manipulation overhead at the + * expense of O(n) removal for arbitrary elements. New elements can be added + * to the list after an existing element, at the head of the list, or at the + * end of the list. Elements being removed from the head of the tail queue + * should use the explicit macro for this purpose for optimum efficiency. + * A singly-linked tail queue may only be traversed in the forward direction. + * Singly-linked tail queues are ideal for applications with large datasets + * and few or no removals or for implementing a FIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * For details on the use of these macros, see the queue(3) manual page. + * + * + * SLIST LIST STAILQ TAILQ + * _HEAD + + + + + * _HEAD_INITIALIZER + + + + + * _ENTRY + + + + + * _INIT + + + + + * _EMPTY + + + + + * _FIRST + + + + + * _NEXT + + + + + * _PREV - - - + + * _LAST - - + + + * _FOREACH + + + + + * _FOREACH_SAFE + + + + + * _FOREACH_REVERSE - - - + + * _FOREACH_REVERSE_SAFE - - - + + * _INSERT_HEAD + + + + + * _INSERT_BEFORE - + - + + * _INSERT_AFTER + + + + + * _INSERT_TAIL - - + + + * _CONCAT - - + + + * _REMOVE_AFTER + - + - + * _REMOVE_HEAD + - + - + * _REMOVE + + + + + * _SWAP + + + + + * + */ +#ifdef QUEUE_MACRO_DEBUG +/* Store the last 2 places the queue element or head was altered */ +struct qm_trace { + char * lastfile; + int lastline; + char * prevfile; + int prevline; +}; + +#define TRACEBUF struct qm_trace trace; +#define TRASHIT(x) do {(x) = (void *)-1;} while (0) +#define QMD_SAVELINK(name, link) void **name = (void *)&(link) + +#define QMD_TRACE_HEAD(head) do { \ + (head)->trace.prevline = (head)->trace.lastline; \ + (head)->trace.prevfile = (head)->trace.lastfile; \ + (head)->trace.lastline = __LINE__; \ + (head)->trace.lastfile = __FILE__; \ +} while (0) + +#define QMD_TRACE_ELEM(elem) do { \ + (elem)->trace.prevline = (elem)->trace.lastline; \ + (elem)->trace.prevfile = (elem)->trace.lastfile; \ + (elem)->trace.lastline = __LINE__; \ + (elem)->trace.lastfile = __FILE__; \ +} while (0) + +#else +#define QMD_TRACE_ELEM(elem) +#define QMD_TRACE_HEAD(head) +#define QMD_SAVELINK(name, link) +#define TRACEBUF +#define TRASHIT(x) +#endif /* QUEUE_MACRO_DEBUG */ + +/* + * Singly-linked List declarations. + */ +#define SLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define SLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define SLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List functions. + */ +#define SLIST_EMPTY(head) ((head)->slh_first == NULL) + +#define SLIST_FIRST(head) ((head)->slh_first) + +#define SLIST_FOREACH(var, head, field) \ + for ((var) = SLIST_FIRST((head)); \ + (var); \ + (var) = SLIST_NEXT((var), field)) + +#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = SLIST_FIRST((head)); \ + (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ + for ((varp) = &SLIST_FIRST((head)); \ + ((var) = *(varp)) != NULL; \ + (varp) = &SLIST_NEXT((var), field)) + +#define SLIST_INIT(head) do { \ + SLIST_FIRST((head)) = NULL; \ +} while (0) + +#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ + SLIST_NEXT((slistelm), field) = (elm); \ +} while (0) + +#define SLIST_INSERT_HEAD(head, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ + SLIST_FIRST((head)) = (elm); \ +} while (0) + +#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) + +#define SLIST_REMOVE(head, elm, type, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.sle_next); \ + if (SLIST_FIRST((head)) == (elm)) { \ + SLIST_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = SLIST_FIRST((head)); \ + while (SLIST_NEXT(curelm, field) != (elm)) \ + curelm = SLIST_NEXT(curelm, field); \ + SLIST_REMOVE_AFTER(curelm, field); \ + } \ + TRASHIT(*oldnext); \ +} while (0) + +#define SLIST_REMOVE_AFTER(elm, field) do { \ + SLIST_NEXT(elm, field) = \ + SLIST_NEXT(SLIST_NEXT(elm, field), field); \ +} while (0) + +#define SLIST_REMOVE_HEAD(head, field) do { \ + SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ +} while (0) + +#define SLIST_SWAP(head1, head2, type) do { \ + struct type *swap_first = SLIST_FIRST(head1); \ + SLIST_FIRST(head1) = SLIST_FIRST(head2); \ + SLIST_FIRST(head2) = swap_first; \ +} while (0) + +/* + * Singly-linked Tail queue declarations. + */ +#define STAILQ_HEAD(name, type) \ +struct name { \ + struct type *stqh_first;/* first element */ \ + struct type **stqh_last;/* addr of last next element */ \ +} + +#define STAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).stqh_first } + +#define STAILQ_ENTRY(type) \ +struct { \ + struct type *stqe_next; /* next element */ \ +} + +/* + * Singly-linked Tail queue functions. + */ +#define STAILQ_CONCAT(head1, head2) do { \ + if (!STAILQ_EMPTY((head2))) { \ + *(head1)->stqh_last = (head2)->stqh_first; \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_INIT((head2)); \ + } \ +} while (0) + +#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) + +#define STAILQ_FIRST(head) ((head)->stqh_first) + +#define STAILQ_FOREACH(var, head, field) \ + for((var) = STAILQ_FIRST((head)); \ + (var); \ + (var) = STAILQ_NEXT((var), field)) + + +#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = STAILQ_FIRST((head)); \ + (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define STAILQ_INIT(head) do { \ + STAILQ_FIRST((head)) = NULL; \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_NEXT((tqelm), field) = (elm); \ +} while (0) + +#define STAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_FIRST((head)) = (elm); \ +} while (0) + +#define STAILQ_INSERT_TAIL(head, elm, field) do { \ + STAILQ_NEXT((elm), field) = NULL; \ + *(head)->stqh_last = (elm); \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_LAST(head, type, field) \ + (STAILQ_EMPTY((head)) ? \ + NULL : \ + ((struct type *)(void *) \ + ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) + +#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) + +#define STAILQ_REMOVE(head, elm, type, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.stqe_next); \ + if (STAILQ_FIRST((head)) == (elm)) { \ + STAILQ_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = STAILQ_FIRST((head)); \ + while (STAILQ_NEXT(curelm, field) != (elm)) \ + curelm = STAILQ_NEXT(curelm, field); \ + STAILQ_REMOVE_AFTER(head, curelm, field); \ + } \ + TRASHIT(*oldnext); \ +} while (0) + +#define STAILQ_REMOVE_AFTER(head, elm, field) do { \ + if ((STAILQ_NEXT(elm, field) = \ + STAILQ_NEXT(STAILQ_NEXT(elm, field), field)) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_REMOVE_HEAD(head, field) do { \ + if ((STAILQ_FIRST((head)) = \ + STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_SWAP(head1, head2, type) do { \ + struct type *swap_first = STAILQ_FIRST(head1); \ + struct type **swap_last = (head1)->stqh_last; \ + STAILQ_FIRST(head1) = STAILQ_FIRST(head2); \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_FIRST(head2) = swap_first; \ + (head2)->stqh_last = swap_last; \ + if (STAILQ_EMPTY(head1)) \ + (head1)->stqh_last = &STAILQ_FIRST(head1); \ + if (STAILQ_EMPTY(head2)) \ + (head2)->stqh_last = &STAILQ_FIRST(head2); \ +} while (0) + + +/* + * List declarations. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ + +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_LIST_CHECK_HEAD(head, field) do { \ + if (LIST_FIRST((head)) != NULL && \ + LIST_FIRST((head))->field.le_prev != \ + &LIST_FIRST((head))) \ + panic("Bad list head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_LIST_CHECK_NEXT(elm, field) do { \ + if (LIST_NEXT((elm), field) != NULL && \ + LIST_NEXT((elm), field)->field.le_prev != \ + &((elm)->field.le_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_LIST_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.le_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_LIST_CHECK_HEAD(head, field) +#define QMD_LIST_CHECK_NEXT(elm, field) +#define QMD_LIST_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define LIST_EMPTY(head) ((head)->lh_first == NULL) + +#define LIST_FIRST(head) ((head)->lh_first) + +#define LIST_FOREACH(var, head, field) \ + for ((var) = LIST_FIRST((head)); \ + (var); \ + (var) = LIST_NEXT((var), field)) + +#define LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = LIST_FIRST((head)); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define LIST_INIT(head) do { \ + LIST_FIRST((head)) = NULL; \ +} while (0) + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ + QMD_LIST_CHECK_NEXT(listelm, field); \ + if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ + LIST_NEXT((listelm), field)->field.le_prev = \ + &LIST_NEXT((elm), field); \ + LIST_NEXT((listelm), field) = (elm); \ + (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ + QMD_LIST_CHECK_PREV(listelm, field); \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + LIST_NEXT((elm), field) = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \ + QMD_LIST_CHECK_HEAD((head), field); \ + if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ + LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ + LIST_FIRST((head)) = (elm); \ + (elm)->field.le_prev = &LIST_FIRST((head)); \ +} while (0) + +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define LIST_REMOVE(elm, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.le_next); \ + QMD_SAVELINK(oldprev, (elm)->field.le_prev); \ + QMD_LIST_CHECK_NEXT(elm, field); \ + QMD_LIST_CHECK_PREV(elm, field); \ + if (LIST_NEXT((elm), field) != NULL) \ + LIST_NEXT((elm), field)->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = LIST_NEXT((elm), field); \ + TRASHIT(*oldnext); \ + TRASHIT(*oldprev); \ +} while (0) + +#define LIST_SWAP(head1, head2, type, field) do { \ + struct type *swap_tmp = LIST_FIRST((head1)); \ + LIST_FIRST((head1)) = LIST_FIRST((head2)); \ + LIST_FIRST((head2)) = swap_tmp; \ + if ((swap_tmp = LIST_FIRST((head1))) != NULL) \ + swap_tmp->field.le_prev = &LIST_FIRST((head1)); \ + if ((swap_tmp = LIST_FIRST((head2))) != NULL) \ + swap_tmp->field.le_prev = &LIST_FIRST((head2)); \ +} while (0) + +/* + * Tail queue declarations. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ +} + +#define TAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first } + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ +} + +/* + * Tail queue functions. + */ +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_TAILQ_CHECK_HEAD(head, field) do { \ + if (!TAILQ_EMPTY(head) && \ + TAILQ_FIRST((head))->field.tqe_prev != \ + &TAILQ_FIRST((head))) \ + panic("Bad tailq head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_TAIL(head, field) do { \ + if (*(head)->tqh_last != NULL) \ + panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_NEXT(elm, field) do { \ + if (TAILQ_NEXT((elm), field) != NULL && \ + TAILQ_NEXT((elm), field)->field.tqe_prev != \ + &((elm)->field.tqe_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_TAILQ_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.tqe_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_TAILQ_CHECK_HEAD(head, field) +#define QMD_TAILQ_CHECK_TAIL(head, headname) +#define QMD_TAILQ_CHECK_NEXT(elm, field) +#define QMD_TAILQ_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define TAILQ_CONCAT(head1, head2, field) do { \ + if (!TAILQ_EMPTY(head2)) { \ + *(head1)->tqh_last = (head2)->tqh_first; \ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ + (head1)->tqh_last = (head2)->tqh_last; \ + TAILQ_INIT((head2)); \ + QMD_TRACE_HEAD(head1); \ + QMD_TRACE_HEAD(head2); \ + } \ +} while (0) + +#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) + +#define TAILQ_FIRST(head) ((head)->tqh_first) + +#define TAILQ_FOREACH(var, head, field) \ + for ((var) = TAILQ_FIRST((head)); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST((head)); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_INIT(head) do { \ + TAILQ_FIRST((head)) = NULL; \ + (head)->tqh_last = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + QMD_TAILQ_CHECK_NEXT(listelm, field); \ + if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else { \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + } \ + TAILQ_NEXT((listelm), field) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + QMD_TAILQ_CHECK_PREV(listelm, field); \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + TAILQ_NEXT((elm), field) = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ + QMD_TAILQ_CHECK_HEAD(head, field); \ + if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ + TAILQ_FIRST((head))->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + TAILQ_FIRST((head)) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ + QMD_TAILQ_CHECK_TAIL(head, field); \ + TAILQ_NEXT((elm), field) = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) + +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + +#define TAILQ_REMOVE(head, elm, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \ + QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \ + QMD_TAILQ_CHECK_NEXT(elm, field); \ + QMD_TAILQ_CHECK_PREV(elm, field); \ + if ((TAILQ_NEXT((elm), field)) != NULL) \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else { \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + QMD_TRACE_HEAD(head); \ + } \ + *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ + TRASHIT(*oldnext); \ + TRASHIT(*oldprev); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_SWAP(head1, head2, type, field) do { \ + struct type *swap_first = (head1)->tqh_first; \ + struct type **swap_last = (head1)->tqh_last; \ + (head1)->tqh_first = (head2)->tqh_first; \ + (head1)->tqh_last = (head2)->tqh_last; \ + (head2)->tqh_first = swap_first; \ + (head2)->tqh_last = swap_last; \ + if ((swap_first = (head1)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head1)->tqh_first; \ + else \ + (head1)->tqh_last = &(head1)->tqh_first; \ + if ((swap_first = (head2)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head2)->tqh_first; \ + else \ + (head2)->tqh_last = &(head2)->tqh_first; \ +} while (0) + +#endif /* !_SYS_QUEUE_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/minios-external/bsd-sys-queue-h-seddery xen-4.6.5/extras/mini-os/include/minios-external/bsd-sys-queue-h-seddery --- xen-4.6.0/extras/mini-os/include/minios-external/bsd-sys-queue-h-seddery 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/minios-external/bsd-sys-queue-h-seddery 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,72 @@ +#!/usr/bin/perl -p +# +# This script is part of the Xen build system. It has a very +# permissive licence to avoid complicating the licence of the +# generated header file and to allow this seddery to be reused by +# other projects. +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this individual file (the "Software"), to deal +# in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, +# sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the +# following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Copyright (C) 2011 Citrix Ltd + +our $namespace, $ucnamespace; + +BEGIN { + die unless @ARGV; + $namespace = pop @ARGV; + $namespace =~ s/^--prefix=// or die; + $ucnamespace = uc $namespace; + + print <tools/libxl/external/bsd-COPYRIGHT + +Exceptions: + +README + + This file + +bsd-sys-queue-h-seddery + + Script to transform the above into a new namespace. diff -Nru xen-4.6.0/extras/mini-os/include/mm.h xen-4.6.5/extras/mini-os/include/mm.h --- xen-4.6.0/extras/mini-os/include/mm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/mm.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,82 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * Copyright (c) 2005, Keir A Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _MM_H_ +#define _MM_H_ + +#if defined(__i386__) +#include +#elif defined(__x86_64__) +#include +#elif defined(__arm__) || defined(__aarch64__) +#include +#else +#error "Unsupported architecture" +#endif +#include + +#include +#include + +#define STACK_SIZE_PAGE_ORDER __STACK_SIZE_PAGE_ORDER +#define STACK_SIZE __STACK_SIZE + + +void init_mm(void); +unsigned long alloc_pages(int order); +#define alloc_page() alloc_pages(0) +void free_pages(void *pointer, int order); +#define free_page(p) free_pages(p, 0) + +static __inline__ int get_order(unsigned long size) +{ + int order; + size = (size-1) >> PAGE_SHIFT; + for ( order = 0; size; order++ ) + size >>= 1; + return order; +} + +void arch_init_demand_mapping_area(unsigned long max_pfn); +void arch_init_mm(unsigned long* start_pfn_p, unsigned long* max_pfn_p); +void arch_init_p2m(unsigned long max_pfn_p); + +unsigned long allocate_ondemand(unsigned long n, unsigned long alignment); +/* map f[i*stride]+i*increment for i in 0..n-1, aligned on alignment pages */ +void *map_frames_ex(const unsigned long *f, unsigned long n, unsigned long stride, + unsigned long increment, unsigned long alignment, domid_t id, + int *err, unsigned long prot); +void do_map_frames(unsigned long addr, + const unsigned long *f, unsigned long n, unsigned long stride, + unsigned long increment, domid_t id, int *err, unsigned long prot); +int unmap_frames(unsigned long va, unsigned long num_frames); +unsigned long alloc_contig_pages(int order, unsigned int addr_bits); +#ifdef HAVE_LIBC +extern unsigned long heap, brk, heap_mapped, heap_end; +#endif + +int free_physical_pages(xen_pfn_t *mfns, int n); +void fini_mm(void); + +#endif /* _MM_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/netfront.h xen-4.6.5/extras/mini-os/include/netfront.h --- xen-4.6.0/extras/mini-os/include/netfront.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/netfront.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,24 @@ +#include +#ifdef HAVE_LWIP +#include +#endif +struct netfront_dev; +struct netfront_dev *init_netfront(char *nodename, void (*netif_rx)(unsigned char *data, int len), unsigned char rawmac[6], char **ip); +void netfront_xmit(struct netfront_dev *dev, unsigned char* data,int len); +void shutdown_netfront(struct netfront_dev *dev); +#ifdef HAVE_LIBC +int netfront_tap_open(char *nodename); +ssize_t netfront_receive(struct netfront_dev *dev, unsigned char *data, size_t len); +#endif + +extern struct wait_queue_head netfront_queue; + +#ifdef HAVE_LWIP +/* Call this to bring up the netfront interface and the lwIP stack. + * N.B. _must_ be called from a thread; it's not safe to call this from + * app_main(). */ +void start_networking(void); +void stop_networking(void); + +void networking_set_addr(struct ip_addr *ipaddr, struct ip_addr *netmask, struct ip_addr *gw); +#endif diff -Nru xen-4.6.0/extras/mini-os/include/pcifront.h xen-4.6.5/extras/mini-os/include/pcifront.h --- xen-4.6.0/extras/mini-os/include/pcifront.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/pcifront.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,29 @@ +#include +#include +struct pcifront_dev; +void pcifront_watches(void *opaque); +struct pcifront_dev *init_pcifront(char *nodename); +void pcifront_op(struct pcifront_dev *dev, struct xen_pci_op *op); +void pcifront_scan(struct pcifront_dev *dev, void (*fun)(unsigned int domain, unsigned int bus, unsigned slot, unsigned int fun)); +int pcifront_conf_read(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + unsigned int off, unsigned int size, unsigned int *val); +int pcifront_conf_write(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + unsigned int off, unsigned int size, unsigned int val); +int pcifront_enable_msi(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun); +int pcifront_disable_msi(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun); +int pcifront_enable_msix(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + struct xen_msix_entry *entries, int n); +int pcifront_disable_msix(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun); +void shutdown_pcifront(struct pcifront_dev *dev); diff -Nru xen-4.6.0/extras/mini-os/include/posix/arpa/inet.h xen-4.6.5/extras/mini-os/include/posix/arpa/inet.h --- xen-4.6.0/extras/mini-os/include/posix/arpa/inet.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/arpa/inet.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef _POSIX_ARPA_INET_H_ +#define _POSIX_ARPA_INET_H_ + +#include + +#endif /* _POSIX_ARPA_INET_H_ */ + diff -Nru xen-4.6.0/extras/mini-os/include/posix/dirent.h xen-4.6.5/extras/mini-os/include/posix/dirent.h --- xen-4.6.0/extras/mini-os/include/posix/dirent.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/dirent.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,24 @@ +#ifndef _POSIX_DIRENT_H +#define _POSIX_DIRENT_H + +#include + +struct dirent { + char *d_name; +}; + +typedef struct { + struct dirent dirent; + char *name; + int32_t offset; + char **entries; + int32_t curentry; + int32_t nbentries; + int has_more; +} DIR; + +DIR *opendir(const char *name); +struct dirent *readdir(DIR *dir); +int closedir(DIR *dir); + +#endif /* _POSIX_DIRENT_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/err.h xen-4.6.5/extras/mini-os/include/posix/err.h --- xen-4.6.0/extras/mini-os/include/posix/err.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/err.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,15 @@ +#ifndef _POSIX_ERR_H +#define _POSIX_ERR_H + +#include + +void err(int eval, const char *fmt, ...); +void errx(int eval, const char *fmt, ...); +void warn(const char *fmt, ...); +void warnx(const char *fmt, ...); +void verr(int eval, const char *fmt, va_list args); +void verrx(int eval, const char *fmt, va_list args); +void vwarn(const char *fmt, va_list args); +void vwarnx(const char *fmt, va_list args); + +#endif /* _POSIX_ERR_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/fcntl.h xen-4.6.5/extras/mini-os/include/posix/fcntl.h --- xen-4.6.0/extras/mini-os/include/posix/fcntl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/fcntl.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,11 @@ +#ifndef _POSIX_FCNTL_H +#define _POSIX_FCNTL_H + +#include_next + +#define F_ULOCK 0 +#define F_LOCK 1 +#define F_TLOCK 2 +#define F_TEST 3 + +#endif /* _POSIX_FCNTL_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/limits.h xen-4.6.5/extras/mini-os/include/posix/limits.h --- xen-4.6.0/extras/mini-os/include/posix/limits.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/limits.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,48 @@ +#ifndef _POSIX_LIMITS_H +#define _POSIX_LIMITS_H + +#include + +#define CHAR_BIT 8 + +#define SCHAR_MAX 0x7f +#define SCHAR_MIN (-SCHAR_MAX-1) +#define UCHAR_MAX 0xff + +#ifdef __CHAR_UNSIGNED__ +# define CHAR_MIN 0 +# define CHAR_MAX UCHAR_MAX +#else +# define CHAR_MIN SCHAR_MIN +# define CHAR_MAX SCHAR_MAX +#endif + +#define INT_MAX 0x7fffffff +#define INT_MIN (-INT_MAX-1) +#define UINT_MAX 0xffffffff + +#define SHRT_MIN (-0x8000) +#define SHRT_MAX 0x7fff +#define USHRT_MAX 0xffff + +#if defined(__x86_64__) +# define LONG_MAX 0x7fffffffffffffffL +# define ULONG_MAX 0xffffffffffffffffUL +#else +# define LONG_MAX 0x7fffffffL +# define ULONG_MAX 0xffffffffUL +#endif +#define LONG_MIN (-LONG_MAX-1L) + +#define LLONG_MAX 0x7fffffffffffffffLL +#define LLONG_MIN (-LLONG_MAX-1LL) +#define ULLONG_MAX 0xffffffffffffffffULL + +#define LONG_LONG_MIN LLONG_MIN +#define LONG_LONG_MAX LLONG_MAX +#define ULONG_LONG_MAX ULLONG_MAX + +#define PATH_MAX __PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE + +#endif /* _POSIX_LIMITS_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/net/if.h xen-4.6.5/extras/mini-os/include/posix/net/if.h --- xen-4.6.0/extras/mini-os/include/posix/net/if.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/net/if.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,85 @@ +/* + * This code is mostly taken from NetBSD net/if.h + * Changes: Stefano Stabellini + * + ****************************************************************************** + * + * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by William Studenmund and Jason R. Thorpe. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _NET_IF_H_ +#define _NET_IF_H_ + +/* + * Length of interface external name, including terminating '\0'. + * Note: this is the same size as a generic device's external name. + */ +#define IF_NAMESIZE 16 + +struct if_nameindex { + unsigned int if_index; /* 1, 2, ... */ + char *if_name; /* null terminated name: "le0", ... */ +}; + +unsigned int if_nametoindex(const char *); +char * if_indextoname(unsigned int, char *); +struct if_nameindex * if_nameindex(void); +void if_freenameindex(struct if_nameindex *); + +#endif /* !_NET_IF_H_ */ + diff -Nru xen-4.6.0/extras/mini-os/include/posix/netdb.h xen-4.6.5/extras/mini-os/include/posix/netdb.h --- xen-4.6.0/extras/mini-os/include/posix/netdb.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/netdb.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,9 @@ +#ifndef _POSIX_NETDB_H_ +#define _POSIX_NETDB_H_ + +struct hostent { + char *h_addr; +}; +#define gethostbyname(buf) NULL + +#endif /* _POSIX_NETDB_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/netinet/in.h xen-4.6.5/extras/mini-os/include/posix/netinet/in.h --- xen-4.6.0/extras/mini-os/include/posix/netinet/in.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/netinet/in.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef _POSIX_SYS_IN_H_ +#define _POSIX_SYS_IN_H_ + +#include +#include + +#endif /* _POSIX_SYS_IN_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/netinet/tcp.h xen-4.6.5/extras/mini-os/include/posix/netinet/tcp.h --- xen-4.6.0/extras/mini-os/include/posix/netinet/tcp.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/netinet/tcp.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,6 @@ +#ifndef _POSIX_SYS_TCP_H_ +#define _POSIX_SYS_TCP_H_ + +#include + +#endif /* _POSIX_SYS_TCP_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/poll.h xen-4.6.5/extras/mini-os/include/posix/poll.h --- xen-4.6.0/extras/mini-os/include/posix/poll.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/poll.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1 @@ +#include diff -Nru xen-4.6.0/extras/mini-os/include/posix/pthread.h xen-4.6.5/extras/mini-os/include/posix/pthread.h --- xen-4.6.0/extras/mini-os/include/posix/pthread.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/pthread.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,64 @@ +#ifndef _POSIX_PTHREAD_H +#define _POSIX_PTHREAD_H + +#include + +/* Let's be single-threaded for now. */ + +typedef struct { + void *ptr; +} *pthread_key_t; +static inline int pthread_key_create(pthread_key_t *key, void (*destr_function)(void*)) +{ + *key = malloc(sizeof(**key)); + (*key)->ptr = NULL; + return 0; +} +static inline int pthread_setspecific(pthread_key_t key, const void *pointer) +{ + key->ptr = (void*) pointer; + return 0; +} +static inline void *pthread_getspecific(pthread_key_t key) +{ + return key->ptr; +} +static inline int pthread_key_delete(pthread_key_t key) +{ + free(key); + return 0; +} + + + +typedef struct {} pthread_mutexattr_t; +static inline int pthread_mutexattr_init(pthread_mutexattr_t *mattr) { return 0; } +#define PTHREAD_MUTEX_NORMAL 0 +#define PTHREAD_MUTEX_RECURSIVE 1 +static inline int pthread_mutexattr_settype(pthread_mutexattr_t *mattr, int kind) { return 0; } +static inline int pthread_mutexattr_destroy(pthread_mutexattr_t *mattr) { return 0; } +typedef struct {} pthread_mutex_t; +#define PTHREAD_MUTEX_INITIALIZER {} +static inline int pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *mattr) { return 0; } +static inline int pthread_mutex_lock(pthread_mutex_t *mutex) { return 0; } +static inline int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; } + + + +typedef struct { + int done; +} pthread_once_t; +#define PTHREAD_ONCE_INIT { 0 } + +static inline int pthread_once(pthread_once_t *once_control, void (*init_routine)(void)) +{ + if (!once_control->done) { + once_control->done = 1; + init_routine(); + } + return 0; +} + +#define __thread + +#endif /* _POSIX_PTHREAD_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/signal.h xen-4.6.5/extras/mini-os/include/posix/signal.h --- xen-4.6.0/extras/mini-os/include/posix/signal.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/signal.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,10 @@ +#ifndef _POSIX_SIGNAL_H +#define _POSIX_SIGNAL_H + +#include_next + +int sigaction(int signum, const struct sigaction * __restrict, + struct sigaction * __restrict); + +#endif + diff -Nru xen-4.6.0/extras/mini-os/include/posix/stdlib.h xen-4.6.5/extras/mini-os/include/posix/stdlib.h --- xen-4.6.0/extras/mini-os/include/posix/stdlib.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/stdlib.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,8 @@ +#ifndef _POSIX_STDLIB_H +#define _POSIX_STDLIB_H + +#include_next + +#define realpath(p,r) strcpy(r,p) + +#endif /* _POSIX_STDLIB_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/strings.h xen-4.6.5/extras/mini-os/include/posix/strings.h --- xen-4.6.0/extras/mini-os/include/posix/strings.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/strings.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,12 @@ +#ifndef _POSIX_STRINGS_H +#define _POSIX_STRINGS_H + +#include + +#define bzero(ptr, size) (memset((ptr), '\0', (size)), (void) 0) + +int ffs (int i); +int ffsl (long int li); +int ffsll (long long int lli); + +#endif /* _POSIX_STRINGS_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/sys/ioctl.h xen-4.6.5/extras/mini-os/include/posix/sys/ioctl.h --- xen-4.6.0/extras/mini-os/include/posix/sys/ioctl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/sys/ioctl.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,16 @@ +#ifndef _POSIX_SYS_IOCTL_H +#define _POSIX_SYS_IOCTL_H + +int ioctl(int fd, int request, ...); + +#define _IOC_NONE 0 +#define _IOC_WRITE 1 +#define _IOC_READ 2 + +#define _IOC(rw, class, n, size) \ + (((rw ) << 30) | \ + ((class) << 22) | \ + ((n ) << 14) | \ + ((size ) << 0)) + +#endif /* _POSIX_SYS_IOCTL_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/sys/mman.h xen-4.6.5/extras/mini-os/include/posix/sys/mman.h --- xen-4.6.0/extras/mini-os/include/posix/sys/mman.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/sys/mman.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,22 @@ +#ifndef _POSIX_SYS_MMAN_H +#define _POSIX_SYS_MMAN_H + +#define PROT_READ 0x1 +#define PROT_WRITE 0x2 +#define PROT_EXEC 0x4 + +#define MAP_SHARED 0x01 +#define MAP_PRIVATE 0x02 +#define MAP_ANON 0x20 + +/* Pages are always resident anyway */ +#define MAP_LOCKED 0x0 + +#define MAP_FAILED ((void*)0) + +void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) asm("mmap64"); +int munmap(void *start, size_t length); +static inline mlock(const void *addr, size_t len) { return 0; } +static inline munlock(const void *addr, size_t len) { return 0; } + +#endif /* _POSIX_SYS_MMAN_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/sys/poll.h xen-4.6.5/extras/mini-os/include/posix/sys/poll.h --- xen-4.6.0/extras/mini-os/include/posix/sys/poll.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/sys/poll.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,79 @@ +/* + * This code is mostly taken from FreeBSD sys/sys/poll.h + * Changes: Stefano Stabellini + * + **************************************************************************** + * Copyright (c) 1997 Peter Wemm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _POSIX_SYS_POLL_H_ +#define _POSIX_SYS_POLL_H_ + +/* + * This file is intended to be compatible with the traditional poll.h. + */ + +typedef unsigned int nfds_t; + +/* + * This structure is passed as an array to poll(2). + */ +struct pollfd { + int fd; /* which file descriptor to poll */ + short events; /* events we are interested in */ + short revents; /* events found on return */ +}; + +/* + * Requestable events. If poll(2) finds any of these set, they are + * copied to revents on return. + * XXX Note that FreeBSD doesn't make much distinction between POLLPRI + * and POLLRDBAND since none of the file types have distinct priority + * bands - and only some have an urgent "mode". + * XXX Note POLLIN isn't really supported in true SVSV terms. Under SYSV + * POLLIN includes all of normal, band and urgent data. Most poll handlers + * on FreeBSD only treat it as "normal" data. + */ +#define POLLIN 0x0001 /* any readable data available */ +#define POLLPRI 0x0002 /* OOB/Urgent readable data */ +#define POLLOUT 0x0004 /* file descriptor is writeable */ +#define POLLRDNORM 0x0040 /* non-OOB/URG data available */ +#define POLLWRNORM POLLOUT /* no write type differentiation */ +#define POLLRDBAND 0x0080 /* OOB/Urgent readable data */ +#define POLLWRBAND 0x0100 /* OOB/Urgent data can be written */ + +/* + * These events are set if they occur regardless of whether they were + * requested. + */ +#define POLLERR 0x0008 /* some poll error occurred */ +#define POLLHUP 0x0010 /* file descriptor was "hung up" */ +#define POLLNVAL 0x0020 /* requested events "invalid" */ + +int poll(struct pollfd _pfd[], nfds_t _nfds, int _timeout); + +#endif /* _POSIX_SYS_POLL_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/sys/select.h xen-4.6.5/extras/mini-os/include/posix/sys/select.h --- xen-4.6.0/extras/mini-os/include/posix/sys/select.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/sys/select.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef _POSIX_SELECT_H +#define _POSIX_SELECT_H + +#include +int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); + +#endif /* _POSIX_SELECT_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/sys/socket.h xen-4.6.5/extras/mini-os/include/posix/sys/socket.h --- xen-4.6.0/extras/mini-os/include/posix/sys/socket.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/sys/socket.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,31 @@ +#ifndef _POSIX_SYS_SOCKET_H_ +#define _POSIX_SYS_SOCKET_H_ + +#include +#include + +int accept(int s, struct sockaddr *addr, socklen_t *addrlen); +int bind(int s, struct sockaddr *name, socklen_t namelen); +int shutdown(int s, int how); +int getpeername (int s, struct sockaddr *name, socklen_t *namelen); +int getsockname (int s, struct sockaddr *name, socklen_t *namelen); +int getsockopt (int s, int level, int optname, void *optval, socklen_t *optlen); +int setsockopt (int s, int level, int optname, const void *optval, socklen_t optlen); +int close(int s); +int connect(int s, struct sockaddr *name, socklen_t namelen); +int listen(int s, int backlog); +int recv(int s, void *mem, int len, unsigned int flags); +//int read(int s, void *mem, int len); +int recvfrom(int s, void *mem, int len, unsigned int flags, + struct sockaddr *from, socklen_t *fromlen); +int send(int s, void *dataptr, int size, unsigned int flags); +int sendto(int s, void *dataptr, int size, unsigned int flags, + struct sockaddr *to, socklen_t tolen); +int socket(int domain, int type, int protocol); +//int write(int s, void *dataptr, int size); +int select(int maxfdp1, fd_set *readset, fd_set *writeset, fd_set *exceptset, + struct timeval *timeout); +//int ioctl(int s, long cmd, void *argp); +int getsockname(int s, struct sockaddr *name, socklen_t *namelen); + +#endif /* _POSIX_SYS_SOCKET_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/sys/stat.h xen-4.6.5/extras/mini-os/include/posix/sys/stat.h --- xen-4.6.0/extras/mini-os/include/posix/sys/stat.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/sys/stat.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef _POSIX_SYS_STAT_H +#define _POSIX_SYS_STAT_H + +#include_next +int fstat(int fd, struct stat *buf) asm("fstat64"); + +#endif /* _POSIX_SYS_STAT_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/syslog.h xen-4.6.5/extras/mini-os/include/posix/syslog.h --- xen-4.6.0/extras/mini-os/include/posix/syslog.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/syslog.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,37 @@ +#ifndef _POSIX_SYSLOG_H +#define _POSIX_SYSLOG_H + +#include + +#define LOG_PID 0 +#define LOG_CONS 0 +#define LOG_NDELAY 0 +#define LOG_ODELAY 0 +#define LOG_NOWAIT 0 + +#define LOG_KERN 0 +#define LOG_USER 0 +#define LOG_MAIL 0 +#define LOG_NEWS 0 +#define LOG_UUCP 0 +#define LOG_DAEMON 0 +#define LOG_AUTH 0 +#define LOG_CRON 0 +#define LOG_LPR 0 + +/* TODO: support */ +#define LOG_EMERG 0 +#define LOG_ALERT 1 +#define LOG_CRIT 2 +#define LOG_ERR 3 +#define LOG_WARNING 4 +#define LOG_NOTICE 5 +#define LOG_INFO 6 +#define LOG_DEBUG 7 + +void openlog(const char *ident, int option, int facility); +void syslog(int priority, const char *format, ...); +void closelog(void); +void vsyslog(int priority, const char *format, va_list ap); + +#endif /* _POSIX_SYSLOG_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/termios.h xen-4.6.5/extras/mini-os/include/posix/termios.h --- xen-4.6.0/extras/mini-os/include/posix/termios.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/termios.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,87 @@ +#ifndef _POSIX_TERMIOS_H +#define _POSIX_TERMIOS_H + +#define NCC 32 + +struct termios { + unsigned long c_iflag; + unsigned long c_oflag; + unsigned long c_lflag; + unsigned long c_cflag; + unsigned char c_cc[NCC]; +}; + +/* modem lines */ +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RI 0x080 +#define TIOCM_DSR 0x100 + +/* c_iflag */ +#define IGNBRK 0x00000001 +#define BRKINT 0x00000002 +#define IGNPAR 0x00000004 +#define PARMRK 0x00000008 +#define INPCK 0x00000010 +#define ISTRIP 0x00000020 +#define INLCR 0x00000040 +#define IGNCR 0x00000080 +#define ICRNL 0x00000100 +#define IUCLC 0x00000200 +#define IXON 0x00000400 +#define IXANY 0x00000800 +#define IXOFF 0x00001000 +#define IMAXBEL 0x00002000 +#define IUTF8 0x00004000 + +/* c_oflag */ +#define OPOST 0x00000001 +#define OLCUC 0x00000002 +#define ONLCR 0x00000004 +#define OCRNL 0x00000008 +#define ONOCR 0x00000010 +#define ONLRET 0x00000020 +#define OFILL 0x00000040 +#define OFDEL 0x00000080 + +/* c_lflag */ +#define ISIG 0x00000001 +#define ICANON 0x00000002 +#define XCASE 0x00000004 +#define ECHO 0x00000008 +#define ECHOE 0x00000010 +#define ECHOK 0x00000020 +#define ECHONL 0x00000040 +#define NOFLSH 0x00000080 +#define TOSTOP 0x00000100 +#define ECHOCTL 0x00000200 +#define ECHOPRT 0x00000400 +#define ECHOKE 0x00000800 +#define FLUSHO 0x00002000 +#define PENDIN 0x00004000 +#define IEXTEN 0x00008000 + +/* c_cflag */ +#define CSIZE 0x00000030 +#define CS8 0x00000030 +#define CSTOPB 0x00000040 +#define CREAD 0x00000080 +#define PARENB 0x00000100 +#define PARODD 0x00000200 +#define HUPCL 0x00000400 +#define CLOCAL 0x00000800 + +/* c_cc */ +#define VTIME 5 +#define VMIN 6 + +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +int tcsetattr(int fildes, int action, const struct termios *tios); +int tcgetattr(int fildes, struct termios *tios); + +#endif /* _POSIX_TERMIOS_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/time.h xen-4.6.5/extras/mini-os/include/posix/time.h --- xen-4.6.0/extras/mini-os/include/posix/time.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/time.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,11 @@ +#ifndef _POSIX_TIME_H +#define _POSIX_TIME_H + +#include +#define CLOCK_MONOTONIC 2 +#include_next + +int nanosleep(const struct timespec *req, struct timespec *rem); +int clock_gettime(clockid_t clock_id, struct timespec *tp); + +#endif /* _POSIX_TIME_H */ diff -Nru xen-4.6.0/extras/mini-os/include/posix/unistd.h xen-4.6.5/extras/mini-os/include/posix/unistd.h --- xen-4.6.0/extras/mini-os/include/posix/unistd.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/posix/unistd.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,16 @@ +#ifndef _POSIX_UNISTD_H +#define _POSIX_UNISTD_H + +#include_next + +uid_t getuid(void); +uid_t geteuid(void); +gid_t getgid(void); +gid_t getegid(void); +int gethostname(char *name, size_t namelen); +size_t getpagesize(void); +int ftruncate(int fd, off_t length); +int lockf(int fd, int cmd, off_t len); +int nice(int inc); + +#endif /* _POSIX_UNISTD_H */ diff -Nru xen-4.6.0/extras/mini-os/include/sched.h xen-4.6.5/extras/mini-os/include/sched.h --- xen-4.6.0/extras/mini-os/include/sched.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/sched.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,55 @@ +#ifndef __SCHED_H__ +#define __SCHED_H__ + +#include +#include +#include +#ifdef HAVE_LIBC +#include +#endif + +struct thread +{ + char *name; + char *stack; + /* keep in that order */ + unsigned long sp; /* Stack pointer */ + unsigned long ip; /* Instruction pointer */ + MINIOS_TAILQ_ENTRY(struct thread) thread_list; + uint32_t flags; + s_time_t wakeup_time; +#ifdef HAVE_LIBC + struct _reent reent; +#endif +}; + +extern struct thread *idle_thread; +void idle_thread_fn(void *unused); + +#define RUNNABLE_FLAG 0x00000001 + +#define is_runnable(_thread) (_thread->flags & RUNNABLE_FLAG) +#define set_runnable(_thread) (_thread->flags |= RUNNABLE_FLAG) +#define clear_runnable(_thread) (_thread->flags &= ~RUNNABLE_FLAG) + +#define switch_threads(prev, next) arch_switch_threads(prev, next) + + /* Architecture specific setup of thread creation. */ +struct thread* arch_create_thread(char *name, void (*function)(void *), + void *data); + +void init_sched(void); +void run_idle_thread(void); +struct thread* create_thread(char *name, void (*function)(void *), void *data); +void exit_thread(void) __attribute__((noreturn)); +void schedule(void); + +#ifdef __INSIDE_MINIOS__ +#define current get_current() +#endif + +void wake(struct thread *thread); +void block(struct thread *thread); +void msleep(uint32_t millisecs); + +#endif /* __SCHED_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/semaphore.h xen-4.6.5/extras/mini-os/include/semaphore.h --- xen-4.6.0/extras/mini-os/include/semaphore.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/semaphore.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,110 @@ +#ifndef _SEMAPHORE_H_ +#define _SEMAPHORE_H_ + +#include +#include + +/* + * Implementation of semaphore in Mini-os is simple, because + * there are no preemptive threads, the atomicity is guaranteed. + */ + +struct semaphore +{ + int count; + struct wait_queue_head wait; +}; + +/* + * the semaphore definition + */ +struct rw_semaphore { + signed long count; + spinlock_t wait_lock; + int debug; +}; + +#define __SEMAPHORE_INITIALIZER(name, n) \ +{ \ + .count = n, \ + .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ +} + +#define __MUTEX_INITIALIZER(name) \ + __SEMAPHORE_INITIALIZER(name,1) + +#define __DECLARE_SEMAPHORE_GENERIC(name,count) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name,count) + +#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1) + +#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0) + +static inline void init_SEMAPHORE(struct semaphore *sem, int count) +{ + sem->count = count; + init_waitqueue_head(&sem->wait); +} + +#define init_MUTEX(sem) init_SEMAPHORE(sem, 1) + +static inline int trydown(struct semaphore *sem) +{ + unsigned long flags; + int ret = 0; + local_irq_save(flags); + if (sem->count > 0) { + ret = 1; + sem->count--; + } + local_irq_restore(flags); + return ret; +} + +static void inline down(struct semaphore *sem) +{ + unsigned long flags; + while (1) { + wait_event(sem->wait, sem->count > 0); + local_irq_save(flags); + if (sem->count > 0) + break; + local_irq_restore(flags); + } + sem->count--; + local_irq_restore(flags); +} + +static void inline up(struct semaphore *sem) +{ + unsigned long flags; + local_irq_save(flags); + sem->count++; + wake_up(&sem->wait); + local_irq_restore(flags); +} + +/* FIXME! Thre read/write semaphores are unimplemented! */ +static inline void init_rwsem(struct rw_semaphore *sem) +{ + sem->count = 1; +} + +static inline void down_read(struct rw_semaphore *sem) +{ +} + + +static inline void up_read(struct rw_semaphore *sem) +{ +} + +static inline void up_write(struct rw_semaphore *sem) +{ +} + +static inline void down_write(struct rw_semaphore *sem) +{ +} + +#endif /* _SEMAPHORE_H */ diff -Nru xen-4.6.0/extras/mini-os/include/spinlock.h xen-4.6.5/extras/mini-os/include/spinlock.h --- xen-4.6.0/extras/mini-os/include/spinlock.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/spinlock.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,55 @@ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#include + +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + */ + +typedef struct { + volatile unsigned int slock; +} spinlock_t; + + +#include + + +#define SPINLOCK_MAGIC 0xdead4ead + +#define SPIN_LOCK_UNLOCKED ARCH_SPIN_LOCK_UNLOCKED + +#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define spin_is_locked(x) arch_spin_is_locked(x) + +#define spin_unlock_wait(x) arch_spin_unlock_wait(x) + + +#define _spin_trylock(lock) ({_raw_spin_trylock(lock) ? \ + 1 : ({ 0;});}) + +#define _spin_lock(lock) \ +do { \ + _raw_spin_lock(lock); \ +} while(0) + +#define _spin_unlock(lock) \ +do { \ + _raw_spin_unlock(lock); \ +} while (0) + + +#define spin_lock(lock) _spin_lock(lock) +#define spin_unlock(lock) _spin_unlock(lock) + +#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/sys/lock.h xen-4.6.5/extras/mini-os/include/sys/lock.h --- xen-4.6.0/extras/mini-os/include/sys/lock.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/sys/lock.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,52 @@ +#ifndef _MINIOS_SYS_LOCK_H_ +#define _MINIOS_SYS_LOCK_H_ + +#ifdef HAVE_LIBC + +/* Due to inclusion loop, we can not include sched.h, so have to hide things */ + +#include + + +typedef struct { + int busy; + struct wait_queue_head wait; +} _LOCK_T; + +#define __LOCK_INIT(class,lock) \ + class _LOCK_T lock = { .wait = __WAIT_QUEUE_HEAD_INITIALIZER(lock.wait) } +int ___lock_init(_LOCK_T *lock); +int ___lock_acquire(_LOCK_T *lock); +int ___lock_try_acquire(_LOCK_T *lock); +int ___lock_release(_LOCK_T *lock); +int ___lock_close(_LOCK_T *lock); +#define __lock_init(__lock) ___lock_init(&__lock) +#define __lock_acquire(__lock) ___lock_acquire(&__lock) +#define __lock_release(__lock) ___lock_release(&__lock) +#define __lock_try_acquire(__lock) ___lock_try_acquire(&__lock) +#define __lock_close(__lock) 0 + + +typedef struct { + struct thread *owner; + int count; + struct wait_queue_head wait; +} _LOCK_RECURSIVE_T; + +#define __LOCK_INIT_RECURSIVE(class, lock) \ + class _LOCK_RECURSIVE_T lock = { .wait = __WAIT_QUEUE_HEAD_INITIALIZER((lock).wait) } + +int ___lock_init_recursive(_LOCK_RECURSIVE_T *lock); +int ___lock_acquire_recursive(_LOCK_RECURSIVE_T *lock); +int ___lock_try_acquire_recursive(_LOCK_RECURSIVE_T *lock); +int ___lock_release_recursive(_LOCK_RECURSIVE_T *lock); +int ___lock_close_recursive(_LOCK_RECURSIVE_T *lock); +#define __lock_init_recursive(__lock) ___lock_init_recursive(&__lock) +#define __lock_acquire_recursive(__lock) ___lock_acquire_recursive(&__lock) +#define __lock_release_recursive(__lock) ___lock_release_recursive(&__lock) +#define __lock_try_acquire_recursive(__lock) ___lock_try_acquire_recursive(&__lock) +#define __lock_close_recursive(__lock) 0 + +#endif + +#endif /* _MINIOS_SYS_LOCK_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/sys/time.h xen-4.6.5/extras/mini-os/include/sys/time.h --- xen-4.6.0/extras/mini-os/include/sys/time.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/sys/time.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,47 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: time.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * Robert Kaiser (kaiser@informatik.fh-wiesbaden.de) + * + * Date: Jul 2003, changes: Jun 2005, Sep 2006 + * + * Environment: Xen Minimal OS + * Description: Time and timer functions + * + **************************************************************************** + */ + +#ifndef _MINIOS_SYS_TIME_H_ +#define _MINIOS_SYS_TIME_H_ + +#ifdef HAVE_LIBC +#include_next + +#else +struct timespec { + time_t tv_sec; + long tv_nsec; +}; + +struct timezone { +}; + +struct timeval { + time_t tv_sec; /* seconds */ + suseconds_t tv_usec; /* microseconds */ +}; + +int gettimeofday(struct timeval *tv, void *tz); + +#endif +#ifdef HAVE_LIBC +#include +#endif + +#endif /* _MINIOS_SYS_TIME_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/time.h xen-4.6.5/extras/mini-os/include/time.h --- xen-4.6.0/extras/mini-os/include/time.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/time.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,63 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: time.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * Robert Kaiser (kaiser@informatik.fh-wiesbaden.de) + * + * Date: Jul 2003, changes: Jun 2005, Sep 2006 + * + * Environment: Xen Minimal OS + * Description: Time and timer functions + * + **************************************************************************** + */ + +#ifndef _MINIOS_TIME_H_ +#define _MINIOS_TIME_H_ +#include + +/* + * System Time + * 64 bit value containing the nanoseconds elapsed since boot time. + * This value is adjusted by frequency drift. + * NOW() returns the current time. + * The other macros are for convenience to approximate short intervals + * of real time into system time + */ +typedef int64_t s_time_t; +#define NOW() ((s_time_t)monotonic_clock()) +#define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL ) +#define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL ) +#define HUNDREDTHS(_hs) (((s_time_t)(_hs)) * 10000000UL ) +#define MILLISECS(_ms) (((s_time_t)(_ms)) * 1000000UL ) +#define MICROSECS(_us) (((s_time_t)(_us)) * 1000UL ) +#define Time_Max ((s_time_t) 0x7fffffffffffffffLL) +#define FOREVER Time_Max +#define NSEC_TO_USEC(_nsec) ((_nsec) / 1000UL) +#define NSEC_TO_MSEC(_nsec) ((_nsec) / 1000000ULL) +#define NSEC_TO_SEC(_nsec) ((_nsec) / 1000000000ULL) + +/* wall clock time */ +typedef long time_t; +typedef long suseconds_t; + +#include + +#ifdef HAVE_LIBC +#include_next +#endif + +/* prototypes */ +void init_time(void); +void fini_time(void); +s_time_t get_s_time(void); +s_time_t get_v_time(void); +uint64_t monotonic_clock(void); +void block_domain(s_time_t until); + +#endif /* _MINIOS_TIME_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/tpmback.h xen-4.6.5/extras/mini-os/include/tpmback.h --- xen-4.6.0/extras/mini-os/include/tpmback.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/tpmback.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/xen/tpmback/tpmback.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself derived from drivers/xen/netback/netback.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2002-2004, K A Fraser + * + * This code has also been derived from drivers/xen/tpmback/xenbus.c + * from the xen 2.6.18 linux kernel + * + * Copyright (C) 2005 IBM Corporation + * Copyright (C) 2005 Rusty Russell + * + * This code has also been derived from drivers/xen/tpmback/interface.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself also derived from drvivers/xen/netback/interface.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2 + * of the License + */ + +#include +#include +#include +#include +#ifndef TPMBACK_H +#define TPMBACK_H + +struct tpmcmd { + domid_t domid; /* Domid of the frontend */ + uint8_t locality; /* Locality requested by the frontend */ + unsigned int handle; /* Handle of the frontend */ + void *opaque; /* Opaque pointer taken from the tpmback instance */ + + uint8_t* req; /* tpm command bits, allocated by driver, DON'T FREE IT */ + unsigned int req_len; /* Size of the command in buf - set by tpmback driver */ + unsigned int resp_len; /* Size of the outgoing command, + you set this before passing the cmd object to tpmback_resp */ + uint8_t* resp; /* Buffer for response - YOU MUST ALLOCATE IT, YOU MUST ALSO FREE IT */ +}; +typedef struct tpmcmd tpmcmd_t; + +/* Initialize the tpm backend driver */ +void init_tpmback(void (*open_cb)(domid_t, unsigned int), void (*close_cb)(domid_t, unsigned int)); + +/* Shutdown tpm backend driver */ +void shutdown_tpmback(void); + +/* Blocks until a tpm command is sent from any front end. + * Returns a pointer to the tpm command to handle. + * Do not try to free this pointer or the req buffer + * This function will return NULL if the tpm backend driver + * is shutdown or any other error occurs */ +tpmcmd_t* tpmback_req_any(void); + +/* Blocks until a tpm command from the frontend at domid/handle + * is sent. + * Returns NULL if domid/handle is not connected, tpmback is + * shutdown or shutting down, or if there is an error + */ +tpmcmd_t* tpmback_req(domid_t domid, unsigned int handle); + +/* Send the response to the tpm command back to the frontend + * This function will free the tpmcmd object, but you must free the resp + * buffer yourself */ +void tpmback_resp(tpmcmd_t* tpmcmd); + +/* Waits for the first frontend to connect and then sets domid and handle appropriately. + * If one or more frontends are already connected, this will set domid and handle to one + * of them arbitrarily. The main use for this function is to wait until a single + * frontend connection has occured. + * returns 0 on success, non-zero on failure */ +int tpmback_wait_for_frontend_connect(domid_t *domid, unsigned int *handle); + +/* returns the number of frontends connected */ +int tpmback_num_frontends(void); + +/* Returns the uuid of the specified frontend, NULL on error. + * The return value is internally allocated, so don't free it */ +unsigned char* tpmback_get_uuid(domid_t domid, unsigned int handle); + +/* Get and set the opaque pointer for a tpmback instance */ +void* tpmback_get_opaque(domid_t domid, unsigned int handle); +/* Returns zero if successful, nonzero on failure (no such frontend) */ +int tpmback_set_opaque(domid_t domid, unsigned int handle, void* opaque); + +/* Get the XSM context of the given domain (using the tpmback event channel) */ +int tpmback_get_peercontext(domid_t domid, unsigned int handle, void* buffer, int buflen); +#endif diff -Nru xen-4.6.0/extras/mini-os/include/tpmfront.h xen-4.6.5/extras/mini-os/include/tpmfront.h --- xen-4.6.0/extras/mini-os/include/tpmfront.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/tpmfront.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/char/tpm_vtpm.c + * from the xen 2.6.18 linux kernel + * + * Copyright (C) 2006 IBM Corporation + * + * This code has also been derived from drivers/char/tpm_xen.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself derived from drivers/xen/netfront/netfront.c + * from the linux kernel + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ +#ifndef TPMFRONT_H +#define TPMFRONT_H + +#include +#include +#include +#include +#include +#include +#include + +struct tpmfront_dev { + grant_ref_t ring_ref; + evtchn_port_t evtchn; + + tpmif_shared_page_t *page; + + domid_t bedomid; + char* nodename; + char* bepath; + + XenbusState state; + + uint8_t waiting; + struct wait_queue_head waitq; + + uint8_t* respbuf; + size_t resplen; + +#ifdef HAVE_LIBC + int fd; +#endif + +}; + + +/*Initialize frontend */ +struct tpmfront_dev* init_tpmfront(const char* nodename); +/*Shutdown frontend */ +void shutdown_tpmfront(struct tpmfront_dev* dev); + +/* Send a tpm command to the backend and wait for the response + * + * @dev - frontend device + * @req - request buffer + * @reqlen - length of request buffer + * @resp - *resp will be set to internal response buffer, don't free it! Value is undefined on error + * @resplen - *resplen will be set to the length of the response. Value is undefined on error + * + * returns 0 on success, non zero on failure. + * */ +int tpmfront_cmd(struct tpmfront_dev* dev, uint8_t* req, size_t reqlen, uint8_t** resp, size_t* resplen); + +/* Set the locality used for communicating with a vTPM */ +int tpmfront_set_locality(struct tpmfront_dev* dev, int locality); + +#ifdef HAVE_LIBC +#include +/* POSIX IO functions: + * use tpmfront_open() to get a file descriptor to the tpm device + * use write() on the fd to send a command to the backend. You must + * include the entire command in a single call to write(). + * use read() on the fd to read the response. You can use + * fstat() to get the size of the response and lseek() to seek on it. + */ +int tpmfront_open(struct tpmfront_dev* dev); +int tpmfront_posix_read(int fd, uint8_t* buf, size_t count); +int tpmfront_posix_write(int fd, const uint8_t* buf, size_t count); +int tpmfront_posix_fstat(int fd, struct stat* buf); +#endif + + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/tpm_tis.h xen-4.6.5/extras/mini-os/include/tpm_tis.h --- xen-4.6.0/extras/mini-os/include/tpm_tis.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/tpm_tis.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/char/tpm.c + * from the linux kernel + * + * Copyright (C) 2004 IBM Corporation + * + * This code has also been derived from drivers/char/tpm/tpm_tis.c + * from the linux kernel + * + * Copyright (C) 2005, 2006 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2 + * of the License + */ +#ifndef TPM_TIS_H +#define TPM_TIS_H + +#include +#include + +#define TPM_TIS_EN_LOCL0 1 +#define TPM_TIS_EN_LOCL1 (1 << 1) +#define TPM_TIS_EN_LOCL2 (1 << 2) +#define TPM_TIS_EN_LOCL3 (1 << 3) +#define TPM_TIS_EN_LOCL4 (1 << 4) +#define TPM_TIS_EN_LOCLALL (TPM_TIS_EN_LOCL0 | TPM_TIS_EN_LOCL1 | TPM_TIS_EN_LOCL2 | TPM_TIS_EN_LOCL3 | TPM_TIS_EN_LOCL4) +#define TPM_TIS_LOCL_INT_TO_FLAG(x) (1 << x) +#define TPM_BASEADDR 0xFED40000 +#define TPM_PROBE_IRQ 0xFFFF + +struct tpm_chip; + +struct tpm_chip* init_tpm_tis(unsigned long baseaddr, int localities, unsigned int irq); +struct tpm_chip* init_tpm2_tis(unsigned long baseaddr, int localities, unsigned int irq); +void shutdown_tpm_tis(struct tpm_chip* tpm); + +int tpm_tis_request_locality(struct tpm_chip* tpm, int locality); +int tpm_tis_cmd(struct tpm_chip* tpm, uint8_t* req, size_t reqlen, uint8_t** resp, size_t* resplen); + +#ifdef HAVE_LIBC +#include +#include +/* POSIX IO functions: + * use tpm_tis_open() to get a file descriptor to the tpm device + * use write() on the fd to send a command to the backend. You must + * include the entire command in a single call to write(). + * use read() on the fd to read the response. You can use + * fstat() to get the size of the response and lseek() to seek on it. + */ +int tpm_tis_open(struct tpm_chip* tpm); +int tpm_tis_posix_read(int fd, uint8_t* buf, size_t count); +int tpm_tis_posix_write(int fd, const uint8_t* buf, size_t count); +int tpm_tis_posix_fstat(int fd, struct stat* buf); +#endif + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/types.h xen-4.6.5/extras/mini-os/include/types.h --- xen-4.6.0/extras/mini-os/include/types.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/types.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,74 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: types.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: May 2003 + * + * Environment: Xen Minimal OS + * Description: a random collection of type definitions + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + */ + +#ifndef _TYPES_H_ +#define _TYPES_H_ +#include + +/* FreeBSD compat types */ +#ifndef HAVE_LIBC +typedef unsigned char u_char; +typedef unsigned int u_int; +typedef unsigned long u_long; +#endif +#if defined(__i386__) || defined(__arm__) +typedef long long quad_t; +typedef unsigned long long u_quad_t; +#elif defined(__x86_64__) +typedef long quad_t; +typedef unsigned long u_quad_t; +#endif /* __i386__ || __x86_64__ */ + +#ifdef HAVE_LIBC +#include +#include +#else +#if defined(__i386__) || defined(__arm__) +typedef unsigned int uintptr_t; +typedef int intptr_t; +#elif defined(__x86_64__) || defined(__aarch64__) +typedef unsigned long uintptr_t; +typedef long intptr_t; +#endif /* __i386__ || __x86_64__ */ +typedef unsigned char uint8_t; +typedef signed char int8_t; +typedef unsigned short uint16_t; +typedef signed short int16_t; +typedef unsigned int uint32_t; +typedef signed int int32_t; +#if defined(__i386__) || defined(__arm__) +typedef signed long long int64_t; +typedef unsigned long long uint64_t; +#elif defined(__x86_64__) || defined(__aarch64__) +typedef signed long int64_t; +typedef unsigned long uint64_t; +#endif +typedef uint64_t uintmax_t; +typedef int64_t intmax_t; +typedef int64_t off_t; +#endif + +typedef intptr_t ptrdiff_t; + + +#ifndef HAVE_LIBC +typedef long ssize_t; +#endif + +#endif /* _TYPES_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/wait.h xen-4.6.5/extras/mini-os/include/wait.h --- xen-4.6.0/extras/mini-os/include/wait.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/wait.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,105 @@ +#ifndef __WAIT_H__ +#define __WAIT_H__ + +#include +#include +#include + +#define DEFINE_WAIT(name) \ +struct wait_queue name = { \ + .thread = get_current(), \ + .waiting = 0, \ +} + + +static inline void init_waitqueue_head(struct wait_queue_head *h) +{ + MINIOS_STAILQ_INIT(h); +} + +static inline void init_waitqueue_entry(struct wait_queue *q, struct thread *thread) +{ + q->thread = thread; + q->waiting = 0; +} + +static inline void add_wait_queue(struct wait_queue_head *h, struct wait_queue *q) +{ + if (!q->waiting) { + MINIOS_STAILQ_INSERT_HEAD(h, q, thread_list); + q->waiting = 1; + } +} + +static inline void remove_wait_queue(struct wait_queue_head *h, struct wait_queue *q) +{ + if (q->waiting) { + MINIOS_STAILQ_REMOVE(h, q, struct wait_queue, thread_list); + q->waiting = 0; + } +} + +static inline void wake_up(struct wait_queue_head *head) +{ + unsigned long flags; + struct wait_queue *curr, *tmp; + local_irq_save(flags); + MINIOS_STAILQ_FOREACH_SAFE(curr, head, thread_list, tmp) + wake(curr->thread); + local_irq_restore(flags); +} + +#define add_waiter(w, wq) do { \ + unsigned long flags; \ + local_irq_save(flags); \ + add_wait_queue(&wq, &w); \ + block(get_current()); \ + local_irq_restore(flags); \ +} while (0) + +#define remove_waiter(w, wq) do { \ + unsigned long flags; \ + local_irq_save(flags); \ + remove_wait_queue(&wq, &w); \ + local_irq_restore(flags); \ +} while (0) + +#define wait_event_deadline(wq, condition, deadline) do { \ + unsigned long flags; \ + DEFINE_WAIT(__wait); \ + if(condition) \ + break; \ + for(;;) \ + { \ + /* protect the list */ \ + local_irq_save(flags); \ + add_wait_queue(&wq, &__wait); \ + get_current()->wakeup_time = deadline; \ + clear_runnable(get_current()); \ + local_irq_restore(flags); \ + if((condition) || (deadline && NOW() >= deadline)) \ + break; \ + schedule(); \ + } \ + local_irq_save(flags); \ + /* need to wake up */ \ + wake(get_current()); \ + remove_wait_queue(&wq, &__wait); \ + local_irq_restore(flags); \ +} while(0) + +#define wait_event(wq, condition) wait_event_deadline(wq, condition, 0) + + + +#endif /* __WAIT_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/waittypes.h xen-4.6.5/extras/mini-os/include/waittypes.h --- xen-4.6.0/extras/mini-os/include/waittypes.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/waittypes.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,32 @@ +#ifndef __WAITTYPE_H__ +#define __WAITTYPE_H__ + +#include + +struct thread; +struct wait_queue +{ + int waiting; + struct thread *thread; + MINIOS_STAILQ_ENTRY(struct wait_queue) thread_list; +}; + +/* TODO - lock required? */ +MINIOS_STAILQ_HEAD(wait_queue_head, struct wait_queue); + +#define DECLARE_WAIT_QUEUE_HEAD(name) \ + struct wait_queue_head name = MINIOS_STAILQ_HEAD_INITIALIZER(name) + +#define __WAIT_QUEUE_HEAD_INITIALIZER(name) MINIOS_STAILQ_HEAD_INITIALIZER(name) + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/x86/arch_endian.h xen-4.6.5/extras/mini-os/include/x86/arch_endian.h --- xen-4.6.0/extras/mini-os/include/x86/arch_endian.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/x86/arch_endian.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef ARCH_ENDIAN_H +#error "Do not include arch_endian by itself, include endian.h" +#else + +#define __BYTE_ORDER __LITTLE_ENDIAN + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/x86/arch_limits.h xen-4.6.5/extras/mini-os/include/x86/arch_limits.h --- xen-4.6.0/extras/mini-os/include/x86/arch_limits.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/x86/arch_limits.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,20 @@ + +#ifndef __ARCH_LIMITS_H__ +#define __ARCH_LIMITS_H__ + +#define __PAGE_SHIFT 12 + +#ifdef __ASSEMBLY__ +#define __PAGE_SIZE (1 << __PAGE_SHIFT) +#else +#ifdef __x86_64__ +#define __PAGE_SIZE (1UL << __PAGE_SHIFT) +#else +#define __PAGE_SIZE (1ULL << __PAGE_SHIFT) +#endif +#endif + +#define __STACK_SIZE_PAGE_ORDER 4 +#define __STACK_SIZE (__PAGE_SIZE * (1 << __STACK_SIZE_PAGE_ORDER)) + +#endif /* __ARCH_LIMITS_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/x86/arch_mm.h xen-4.6.5/extras/mini-os/include/x86/arch_mm.h --- xen-4.6.0/extras/mini-os/include/x86/arch_mm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/x86/arch_mm.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,233 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * Copyright (c) 2005, Keir A Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _ARCH_MM_H_ +#define _ARCH_MM_H_ + +#ifndef __ASSEMBLY__ +#include +#if defined(__i386__) +#include +#elif defined(__x86_64__) +#include +#else +#error "Unsupported architecture" +#endif +#endif + +#define L1_FRAME 1 +#define L2_FRAME 2 +#define L3_FRAME 3 + +#define L1_PAGETABLE_SHIFT 12 + +#if defined(__i386__) + +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 4 + +#define PADDR_BITS 44 +#define PADDR_MASK ((1ULL << PADDR_BITS)-1) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) + +/* + * If starting from virtual address greater than 0xc0000000, + * this value will be 2 to account for final mid-level page + * directory which is always mapped in at this location. + */ +#define NOT_L1_FRAMES 3 +#define PRIpte "016llx" +#ifndef __ASSEMBLY__ +typedef uint64_t pgentry_t; +#endif + +#elif defined(__x86_64__) + +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 +#define L4_PAGETABLE_SHIFT 39 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 512 +#define L4_PAGETABLE_ENTRIES 512 + +/* These are page-table limitations. Current CPUs support only 40-bit phys. */ +#define PADDR_BITS 52 +#define VADDR_BITS 48 +#define PADDR_MASK ((1UL << PADDR_BITS)-1) +#define VADDR_MASK ((1UL << VADDR_BITS)-1) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) +#define L3_MASK ((1UL << L4_PAGETABLE_SHIFT) - 1) + +#define NOT_L1_FRAMES 3 +#define PRIpte "016lx" +#ifndef __ASSEMBLY__ +typedef unsigned long pgentry_t; +#endif + +#endif + +#define L1_MASK ((1UL << L2_PAGETABLE_SHIFT) - 1) + +/* Given a virtual address, get an entry offset into a page table. */ +#define l1_table_offset(_a) \ + (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) +#define l2_table_offset(_a) \ + (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) +#define l3_table_offset(_a) \ + (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) +#if defined(__x86_64__) +#define l4_table_offset(_a) \ + (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) +#endif + +#define _PAGE_PRESENT 0x001ULL +#define _PAGE_RW 0x002ULL +#define _PAGE_USER 0x004ULL +#define _PAGE_PWT 0x008ULL +#define _PAGE_PCD 0x010ULL +#define _PAGE_ACCESSED 0x020ULL +#define _PAGE_DIRTY 0x040ULL +#define _PAGE_PAT 0x080ULL +#define _PAGE_PSE 0x080ULL +#define _PAGE_GLOBAL 0x100ULL + +#if defined(__i386__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT) +#elif defined(__x86_64__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) +#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_USER) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#endif /* __i386__ || __x86_64__ */ + +/* flags for ioremap */ +#define IO_PROT (L1_PROT) +#define IO_PROT_NOCACHE (L1_PROT | _PAGE_PCD) + +/* for P2M */ +#define INVALID_P2M_ENTRY (~0UL) + +#include "arch_limits.h" +#define PAGE_SIZE __PAGE_SIZE +#define PAGE_SHIFT __PAGE_SHIFT +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT) +#define PFN_DOWN(x) ((x) >> L1_PAGETABLE_SHIFT) +#define PFN_PHYS(x) ((uint64_t)(x) << L1_PAGETABLE_SHIFT) +#define PHYS_PFN(x) ((x) >> L1_PAGETABLE_SHIFT) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +#ifndef __ASSEMBLY__ +/* Definitions for machine and pseudophysical addresses. */ +#ifdef __i386__ +typedef unsigned long long paddr_t; +typedef unsigned long long maddr_t; +#else +typedef unsigned long paddr_t; +typedef unsigned long maddr_t; +#endif + +extern unsigned long *phys_to_machine_mapping; +extern char _text, _etext, _erodata, _edata, _end; +extern unsigned long mfn_zero; +#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) +static __inline__ maddr_t phys_to_machine(paddr_t phys) +{ + maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); + return machine; +} + +#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) +static __inline__ paddr_t machine_to_phys(maddr_t machine) +{ + paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT); + phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); + return phys; +} +#endif + +#define VIRT_START ((unsigned long)&_text) + +#define to_phys(x) ((unsigned long)(x)-VIRT_START) +#define to_virt(x) ((void *)((unsigned long)(x)+VIRT_START)) + +#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define virt_to_mfn(_virt) (pfn_to_mfn(virt_to_pfn(_virt))) +#define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach))) +#define virt_to_mach(_virt) (phys_to_machine(to_phys(_virt))) +#define mfn_to_virt(_mfn) (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT)) +#define pfn_to_virt(_pfn) (to_virt((_pfn) << PAGE_SHIFT)) + +/* Pagetable walking. */ +#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> L1_PAGETABLE_SHIFT) +#define pte_to_virt(_pte) to_virt(mfn_to_pfn(pte_to_mfn(_pte)) << PAGE_SHIFT) + + +#define PT_BASE ((pgentry_t *)start_info.pt_base) + +#ifdef __x86_64__ +#define virtual_to_l3(_virt) ((pgentry_t *)pte_to_virt(PT_BASE[l4_table_offset(_virt)])) +#else +#define virtual_to_l3(_virt) PT_BASE +#endif + +#define virtual_to_l2(_virt) ({ \ + unsigned long __virt2 = (_virt); \ + (pgentry_t *) pte_to_virt(virtual_to_l3(__virt2)[l3_table_offset(__virt2)]); \ +}) + +#define virtual_to_l1(_virt) ({ \ + unsigned long __virt1 = (_virt); \ + (pgentry_t *) pte_to_virt(virtual_to_l2(__virt1)[l2_table_offset(__virt1)]); \ +}) + +#define virtual_to_pte(_virt) ({ \ + unsigned long __virt0 = (unsigned long) (_virt); \ + virtual_to_l1(__virt0)[l1_table_offset(__virt0)]; \ +}) +#define virtual_to_mfn(_virt) pte_to_mfn(virtual_to_pte(_virt)) + +#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, L1_PROT) +#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, NULL, L1_PROT_RO) +#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, DOMID_SELF, NULL, L1_PROT_RO) + +pgentry_t *need_pgt(unsigned long addr); + +#endif /* _ARCH_MM_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/x86/arch_sched.h xen-4.6.5/extras/mini-os/include/x86/arch_sched.h --- xen-4.6.0/extras/mini-os/include/x86/arch_sched.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/x86/arch_sched.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,25 @@ + +#ifndef __ARCH_SCHED_H__ +#define __ARCH_SCHED_H__ + +#include "arch_limits.h" + +static inline struct thread* get_current(void) +{ + struct thread **current; +#ifdef __i386__ + register unsigned long sp asm("esp"); +#else + register unsigned long sp asm("rsp"); +#endif + current = (void *)(unsigned long)(sp & ~(__STACK_SIZE-1)); + return *current; +} + +extern void __arch_switch_threads(unsigned long *prevctx, unsigned long *nextctx); + +#define arch_switch_threads(prev,next) __arch_switch_threads(&(prev)->sp, &(next)->sp) + + + +#endif /* __ARCH_SCHED_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/x86/arch_spinlock.h xen-4.6.5/extras/mini-os/include/x86/arch_spinlock.h --- xen-4.6.0/extras/mini-os/include/x86/arch_spinlock.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/x86/arch_spinlock.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,94 @@ + + +#ifndef __ARCH_ASM_SPINLOCK_H +#define __ARCH_ASM_SPINLOCK_H + +#include +#include "os.h" + + +#define ARCH_SPIN_LOCK_UNLOCKED { 1 } + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) <= 0) +#define arch_spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) + +#define spin_lock_string \ + "1:\n" \ + LOCK \ + "decb %0\n\t" \ + "jns 3f\n" \ + "2:\t" \ + "rep;nop\n\t" \ + "cmpb $0,%0\n\t" \ + "jle 2b\n\t" \ + "jmp 1b\n" \ + "3:\n\t" + +#define spin_lock_string_flags \ + "1:\n" \ + LOCK \ + "decb %0\n\t" \ + "jns 4f\n\t" \ + "2:\t" \ + "testl $0x200, %1\n\t" \ + "jz 3f\n\t" \ + "#sti\n\t" \ + "3:\t" \ + "rep;nop\n\t" \ + "cmpb $0, %0\n\t" \ + "jle 3b\n\t" \ + "#cli\n\t" \ + "jmp 1b\n" \ + "4:\n\t" + +/* + * This works. Despite all the confusion. + * (except on PPro SMP or if we are using OOSTORE) + * (PPro errata 66, 92) + */ + +#define spin_unlock_string \ + "xchgb %b0, %1" \ + :"=q" (oldval), "=m" (lock->slock) \ + :"0" (oldval) : "memory" + +static inline void _raw_spin_unlock(spinlock_t *lock) +{ + char oldval = ARCH_SPIN_LOCK_UNLOCKED; + __asm__ __volatile__( + spin_unlock_string + ); +} + +static inline int _raw_spin_trylock(spinlock_t *lock) +{ + char oldval; + __asm__ __volatile__( + "xchgb %b0,%1\n" + :"=q" (oldval), "=m" (lock->slock) + :"0" (0) : "memory"); + return oldval > 0; +} + +static inline void _raw_spin_lock(spinlock_t *lock) +{ + __asm__ __volatile__( + spin_lock_string + :"=m" (lock->slock) : : "memory"); +} + +static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags) +{ + __asm__ __volatile__( + spin_lock_string_flags + :"=m" (lock->slock) : "r" (flags) : "memory"); +} + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/x86/os.h xen-4.6.5/extras/mini-os/include/x86/os.h --- xen-4.6.0/extras/mini-os/include/x86/os.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/x86/os.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,572 @@ +/****************************************************************************** + * os.h + * + * random collection of macros and definition + */ + +#ifndef _OS_H_ +#define _OS_H_ + +#define smp_processor_id() 0 + + +#ifndef __ASSEMBLY__ +#include +#include +#include +#include +#include + +#define USED __attribute__ ((used)) + +#define BUG do_exit + +#endif +#include + + + +#define __KERNEL_CS FLAT_KERNEL_CS +#define __KERNEL_DS FLAT_KERNEL_DS +#define __KERNEL_SS FLAT_KERNEL_SS + +#define TRAP_divide_error 0 +#define TRAP_debug 1 +#define TRAP_nmi 2 +#define TRAP_int3 3 +#define TRAP_overflow 4 +#define TRAP_bounds 5 +#define TRAP_invalid_op 6 +#define TRAP_no_device 7 +#define TRAP_double_fault 8 +#define TRAP_copro_seg 9 +#define TRAP_invalid_tss 10 +#define TRAP_no_segment 11 +#define TRAP_stack_error 12 +#define TRAP_gp_fault 13 +#define TRAP_page_fault 14 +#define TRAP_spurious_int 15 +#define TRAP_copro_error 16 +#define TRAP_alignment_check 17 +#define TRAP_machine_check 18 +#define TRAP_simd_error 19 +#define TRAP_deferred_nmi 31 + +/* Everything below this point is not included by assembler (.S) files. */ +#ifndef __ASSEMBLY__ + +extern shared_info_t *HYPERVISOR_shared_info; + +void trap_init(void); +void trap_fini(void); + +void arch_fini(void); + + + + + +/* + * The use of 'barrier' in the following reflects their use as local-lock + * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following + * critical operations are executed. All critical operations must complete + * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also + * includes these barriers, for example. + */ + +#define __cli() \ +do { \ + vcpu_info_t *_vcpu; \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ + _vcpu->evtchn_upcall_mask = 1; \ + barrier(); \ +} while (0) + +#define __sti() \ +do { \ + vcpu_info_t *_vcpu; \ + barrier(); \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ + _vcpu->evtchn_upcall_mask = 0; \ + barrier(); /* unmask then check (avoid races) */ \ + if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + force_evtchn_callback(); \ +} while (0) + +#define __save_flags(x) \ +do { \ + vcpu_info_t *_vcpu; \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ + (x) = _vcpu->evtchn_upcall_mask; \ +} while (0) + +#define __restore_flags(x) \ +do { \ + vcpu_info_t *_vcpu; \ + barrier(); \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ + if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ + barrier(); /* unmask then check (avoid races) */ \ + if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + force_evtchn_callback(); \ + }\ +} while (0) + +#define safe_halt() ((void)0) + +#define __save_and_cli(x) \ +do { \ + vcpu_info_t *_vcpu; \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ + (x) = _vcpu->evtchn_upcall_mask; \ + _vcpu->evtchn_upcall_mask = 1; \ + barrier(); \ +} while (0) + +#define local_irq_save(x) __save_and_cli(x) +#define local_irq_restore(x) __restore_flags(x) +#define local_save_flags(x) __save_flags(x) +#define local_irq_disable() __cli() +#define local_irq_enable() __sti() + +#define irqs_disabled() \ + HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].evtchn_upcall_mask + +/* This is a barrier for the compiler only, NOT the processor! */ +#define barrier() __asm__ __volatile__("": : :"memory") + +#if defined(__i386__) +#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define wmb() __asm__ __volatile__ ("": : :"memory") +#elif defined(__x86_64__) +#define mb() __asm__ __volatile__ ("mfence":::"memory") +#define rmb() __asm__ __volatile__ ("lfence":::"memory") +#define wmb() __asm__ __volatile__ ("sfence" ::: "memory") /* From CONFIG_UNORDERED_IO (linux) */ +#endif + + +#define LOCK_PREFIX "" +#define LOCK "" +#define ADDR (*(volatile long *) addr) +/* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, + * not some alias that contains the same information. + */ +typedef struct { volatile int counter; } atomic_t; + + +/************************** i386 *******************************/ +#ifdef __INSIDE_MINIOS__ +#if defined (__i386__) + +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) +struct __xchg_dummy { unsigned long a[100]; }; +#define __xg(x) ((struct __xchg_dummy *)(x)) +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It can be reorderdered on other architectures other than x86. + * It also implies a memory barrier. + */ +static inline int test_and_clear_bit(int nr, volatile unsigned long * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +static inline int constant_test_bit(int nr, const volatile unsigned long *addr) +{ + return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0; +} + +static inline int variable_test_bit(int nr, const volatile unsigned long * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"Ir" (nr)); + return oldbit; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note: there are no guarantees that this function will not be reordered + * on non x86 architectures, so if you are writting portable code, + * make sure not to rely on its reordering guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void set_bit(int nr, volatile unsigned long * addr) +{ + __asm__ __volatile__( LOCK + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static inline void clear_bit(int nr, volatile unsigned long * addr) +{ + __asm__ __volatile__( LOCK + "btrl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + __asm__("bsfl %1,%0" + :"=r" (word) + :"rm" (word)); + return word; +} + + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ +#define ADDR (*(volatile long *) addr) + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + + + +#elif defined(__x86_64__)/* ifdef __i386__ */ +/************************** x86_84 *******************************/ + +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) +#define __xg(x) ((volatile long *)(x)) +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %k0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 8: + __asm__ __volatile__("xchgq %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"dIr" (nr) : "memory"); + return oldbit; +} + +static __inline__ int constant_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int variable_test_bit(int nr, volatile const void * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"dIr" (nr)); + return oldbit; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btsl %1,%0" + :"=m" (ADDR) + :"dIr" (nr) : "memory"); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static __inline__ void clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btrl %1,%0" + :"=m" (ADDR) + :"dIr" (nr)); +} + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __inline__ unsigned long __ffs(unsigned long word) +{ + __asm__("bsfq %1,%0" + :"=r" (word) + :"rm" (word)); + return word; +} + +#define ADDR (*(volatile long *) addr) + +#define rdtscll(val) do { \ + unsigned int __a,__d; \ + asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \ + (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ +} while(0) + +#define wrmsr(msr,val1,val2) \ + __asm__ __volatile__("wrmsr" \ + : /* no outputs */ \ + : "c" (msr), "a" (val1), "d" (val2)) + +#define wrmsrl(msr,val) wrmsr(msr,(uint32_t)((uint64_t)(val)),((uint64_t)(val))>>32) + + +#else /* ifdef __x86_64__ */ +#error "Unsupported architecture" +#endif +#endif /* ifdef __INSIDE_MINIOS */ + +/********************* common i386 and x86_64 ****************************/ +struct __synch_xchg_dummy { unsigned long a[100]; }; +#define __synch_xg(x) ((struct __synch_xchg_dummy *)(x)) + +#define synch_cmpxchg(ptr, old, new) \ +((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\ + (unsigned long)(old), \ + (unsigned long)(new), \ + sizeof(*(ptr)))) + +static inline unsigned long __synch_cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("lock; cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("lock; cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#ifdef __x86_64__ + case 4: + __asm__ __volatile__("lock; cmpxchgl %k1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + case 8: + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#else + case 4: + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#endif + } + return old; +} + + +static __inline__ void synch_set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btsl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ void synch_clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btrl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "lock btsl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "lock btrl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int synch_const_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & + (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int synch_var_test_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "btl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) ); + return oldbit; +} + +#define synch_test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + synch_const_test_bit((nr),(addr)) : \ + synch_var_test_bit((nr),(addr))) + +static inline int +HYPERVISOR_xsm_op( + struct xen_flask_op *op) +{ + return _hypercall1(int, xsm_op, op); +} + +#undef ADDR + +#endif /* not assembly */ +#endif /* _OS_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/x86/traps.h xen-4.6.5/extras/mini-os/include/x86/traps.h --- xen-4.6.0/extras/mini-os/include/x86/traps.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/x86/traps.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,78 @@ +/* + **************************************************************************** + * (C) 2005 - Grzegorz Milos - Intel Reseach Cambridge + **************************************************************************** + * + * File: traps.h + * Author: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: Jun 2005 + * + * Environment: Xen Minimal OS + * Description: Deals with traps + * + **************************************************************************** + */ + +#ifndef _TRAPS_H_ +#define _TRAPS_H_ + +#ifdef __i386__ +struct pt_regs { + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; +}; +#elif __x86_64__ + +struct pt_regs { + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long rbp; + unsigned long rbx; +/* arguments: non interrupts/non tracing syscalls only save upto here*/ + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long rax; + unsigned long rcx; + unsigned long rdx; + unsigned long rsi; + unsigned long rdi; + unsigned long orig_rax; +/* end of arguments */ +/* cpu exception frame or undefined */ + unsigned long rip; + unsigned long cs; + unsigned long eflags; + unsigned long rsp; + unsigned long ss; +/* top of stack page */ +}; + + +#endif + +void dump_regs(struct pt_regs *regs); +void stack_walk(void); + +#define TRAP_PF_PROT 0x1 +#define TRAP_PF_WRITE 0x2 +#define TRAP_PF_USER 0x4 + +#endif /* _TRAPS_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/x86/x86_32/arch_wordsize.h xen-4.6.5/extras/mini-os/include/x86/x86_32/arch_wordsize.h --- xen-4.6.0/extras/mini-os/include/x86/x86_32/arch_wordsize.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/x86/x86_32/arch_wordsize.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1 @@ +#define __WORDSIZE 32 diff -Nru xen-4.6.0/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h xen-4.6.5/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h --- xen-4.6.0/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,337 @@ +/****************************************************************************** + * hypercall-x86_32.h + * + * Copied from XenLinux. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_X86_32_H__ +#define __HYPERCALL_X86_32_H__ + +#include +#include +#include +#include + +typedef struct { unsigned long pte_low, pte_high; } pte_t; + +#define __pte(x) ({ unsigned long long _x = (x); \ + ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) + +#define __STR(x) #x +#define STR(x) __STR(x) + +extern char hypercall_page[PAGE_SIZE]; + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res) \ + : \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)), \ + "5" ((long)(a5)) \ + : "memory" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_set_callbacks( + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline int +HYPERVISOR_shutdown( + unsigned int reason) +{ + struct sched_shutdown shutdown = { .reason = reason }; + return _hypercall2(int, sched_op, SCHEDOP_shutdown, &shutdown); +} + +static inline long +HYPERVISOR_set_timer_op( + uint64_t timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor( + uint64_t ma, uint64_t desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping( + unsigned long va, pte_t new_val, unsigned long flags) +{ + return _hypercall4(int, update_va_mapping, va, + new_val.pte_low, new_val.pte_high, flags); +} + +static inline int +HYPERVISOR_event_channel_op( + int cmd, void *op) +{ + return _hypercall2(int, event_channel_op, cmd, op); +} + +static inline int +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op( + int cmd, void *physdev_op) +{ + return _hypercall2(int, physdev_op, cmd, physdev_op); +} + +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte_low, new_val.pte_high, flags, domid); +} + +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op( + unsigned long op, + unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + +static inline int +HYPERVISOR_sysctl( + unsigned long op) +{ + return _hypercall1(int, sysctl, op); +} + +static inline int +HYPERVISOR_domctl( + unsigned long op) +{ + return _hypercall1(int, domctl, op); +} + +#endif /* __HYPERCALL_X86_32_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/x86/x86_64/arch_wordsize.h xen-4.6.5/extras/mini-os/include/x86/x86_64/arch_wordsize.h --- xen-4.6.0/extras/mini-os/include/x86/x86_64/arch_wordsize.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/x86/x86_64/arch_wordsize.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,2 @@ +#define __WORDSIZE 64 +#define __WORDSIZE_COMPAT32 1 diff -Nru xen-4.6.0/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h xen-4.6.5/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h --- xen-4.6.0/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,344 @@ +/****************************************************************************** + * hypercall-x86_64.h + * + * Copied from XenLinux. + * + * Copyright (c) 2002-2004, K A Fraser + * + * 64-bit updates: + * Benjamin Liu + * Jun Nakajima + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_X86_64_H__ +#define __HYPERCALL_X86_64_H__ + +#include +#include +#include + +typedef struct { unsigned long pte; } pte_t; + +#define __pte(x) ((pte_t) { (x) } ) + +#define __STR(x) #x +#define STR(x) __STR(x) + +extern char hypercall_page[PAGE_SIZE]; + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res) \ + : \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "movq %7,%%r10; " \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "g" ((long)(a4)) \ + : "memory", "r10" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "movq %7,%%r10; movq %8,%%r8; " \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "g" ((long)(a4)), \ + "g" ((long)(a5)) \ + : "memory", "r10", "r8" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_set_callbacks( + unsigned long event_address, unsigned long failsafe_address, + unsigned long syscall_address) +{ + return _hypercall3(int, set_callbacks, + event_address, failsafe_address, syscall_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline int +HYPERVISOR_shutdown( + unsigned int reason) +{ + struct sched_shutdown shutdown = { .reason = reason }; + return _hypercall2(int, sched_op, SCHEDOP_shutdown, &shutdown); +} + +static inline long +HYPERVISOR_set_timer_op( + uint64_t timeout) +{ + return _hypercall1(long, set_timer_op, timeout); +} + +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor( + unsigned long ma, unsigned long word) +{ + return _hypercall2(int, update_descriptor, ma, word); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping( + unsigned long va, pte_t new_val, unsigned long flags) +{ + return _hypercall3(int, update_va_mapping, va, new_val.pte, flags); +} + +static inline int +HYPERVISOR_event_channel_op( + int cmd, void *op) +{ + return _hypercall2(int, event_channel_op, cmd, op); +} + +static inline int +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op( + int cmd, void *physdev_op) +{ + return _hypercall2(int, physdev_op, cmd, physdev_op); +} + +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + return _hypercall4(int, update_va_mapping_otherdomain, va, + new_val.pte, flags, domid); +} + +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_set_segment_base( + int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op( + unsigned long op, + unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + +static inline int +HYPERVISOR_sysctl( + unsigned long op) +{ + return _hypercall1(int, sysctl, op); +} + +static inline int +HYPERVISOR_domctl( + unsigned long op) +{ + return _hypercall1(int, domctl, op); +} + +#endif /* __HYPERCALL_X86_64_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/arch-arm/hvm/save.h xen-4.6.5/extras/mini-os/include/xen/arch-arm/hvm/save.h --- xen-4.6.0/extras/mini-os/include/xen/arch-arm/hvm/save.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/arch-arm/hvm/save.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,39 @@ +/* + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * Copyright (c) 2012 Citrix Systems Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_ARM_H__ +#define __XEN_PUBLIC_HVM_SAVE_ARM_H__ + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/arch-arm.h xen-4.6.5/extras/mini-os/include/xen/arch-arm.h --- xen-4.6.0/extras/mini-os/include/xen/arch-arm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/arch-arm.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,432 @@ +/****************************************************************************** + * arch-arm.h + * + * Guest OS interface to ARM Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright 2011 (C) Citrix Systems + */ + +#ifndef __XEN_PUBLIC_ARCH_ARM_H__ +#define __XEN_PUBLIC_ARCH_ARM_H__ + +/* + * `incontents 50 arm_abi Hypercall Calling Convention + * + * A hypercall is issued using the ARM HVC instruction. + * + * A hypercall can take up to 5 arguments. These are passed in + * registers, the first argument in x0/r0 (for arm64/arm32 guests + * respectively irrespective of whether the underlying hypervisor is + * 32- or 64-bit), the second argument in x1/r1, the third in x2/r2, + * the forth in x3/r3 and the fifth in x4/r4. + * + * The hypercall number is passed in r12 (arm) or x16 (arm64). In both + * cases the relevant ARM procedure calling convention specifies this + * is an inter-procedure-call scratch register (e.g. for use in linker + * stubs). This use does not conflict with use during a hypercall. + * + * The HVC ISS must contain a Xen specific TAG: XEN_HYPERCALL_TAG. + * + * The return value is in x0/r0. + * + * The hypercall will clobber x16/r12 and the argument registers used + * by that hypercall (except r0 which is the return value) i.e. in + * addition to x16/r12 a 2 argument hypercall will clobber x1/r1 and a + * 4 argument hypercall will clobber x1/r1, x2/r2 and x3/r3. + * + * Parameter structs passed to hypercalls are laid out according to + * the Procedure Call Standard for the ARM Architecture (AAPCS, AKA + * EABI) and Procedure Call Standard for the ARM 64-bit Architecture + * (AAPCS64). Where there is a conflict the 64-bit standard should be + * used regardless of guest type. Structures which are passed as + * hypercall arguments are always little endian. + * + * All memory which is shared with other entities in the system + * (including the hypervisor and other guests) must reside in memory + * which is mapped as Normal Inner-cacheable. This applies to: + * - hypercall arguments passed via a pointer to guest memory. + * - memory shared via the grant table mechanism (including PV I/O + * rings etc). + * - memory shared with the hypervisor (struct shared_info, struct + * vcpu_info, the grant table, etc). + * + * Any Inner cache allocation strategy (Write-Back, Write-Through etc) + * is acceptable. There is no restriction on the Outer-cacheability. + */ + +/* + * `incontents 55 arm_hcall Supported Hypercalls + * + * Xen on ARM makes extensive use of hardware facilities and therefore + * only a subset of the potential hypercalls are required. + * + * Since ARM uses second stage paging any machine/physical addresses + * passed to hypercalls are Guest Physical Addresses (Intermediate + * Physical Addresses) unless otherwise noted. + * + * The following hypercalls (and sub operations) are supported on the + * ARM platform. Other hypercalls should be considered + * unavailable/unsupported. + * + * HYPERVISOR_memory_op + * All generic sub-operations + * + * HYPERVISOR_domctl + * All generic sub-operations, with the exception of: + * * XEN_DOMCTL_irq_permission (not yet implemented) + * + * HYPERVISOR_sched_op + * All generic sub-operations, with the exception of: + * * SCHEDOP_block -- prefer wfi hardware instruction + * + * HYPERVISOR_console_io + * All generic sub-operations + * + * HYPERVISOR_xen_version + * All generic sub-operations + * + * HYPERVISOR_event_channel_op + * All generic sub-operations + * + * HYPERVISOR_physdev_op + * No sub-operations are currenty supported + * + * HYPERVISOR_sysctl + * All generic sub-operations, with the exception of: + * * XEN_SYSCTL_page_offline_op + * * XEN_SYSCTL_get_pmstat + * * XEN_SYSCTL_pm_op + * + * HYPERVISOR_hvm_op + * Exactly these sub-operations are supported: + * * HVMOP_set_param + * * HVMOP_get_param + * + * HYPERVISOR_grant_table_op + * All generic sub-operations + * + * HYPERVISOR_vcpu_op + * Exactly these sub-operations are supported: + * * VCPUOP_register_vcpu_info + * * VCPUOP_register_runstate_memory_area + * + * + * Other notes on the ARM ABI: + * + * - struct start_info is not exported to ARM guests. + * + * - struct shared_info is mapped by ARM guests using the + * HYPERVISOR_memory_op sub-op XENMEM_add_to_physmap, passing + * XENMAPSPACE_shared_info as space parameter. + * + * - All the per-cpu struct vcpu_info are mapped by ARM guests using the + * HYPERVISOR_vcpu_op sub-op VCPUOP_register_vcpu_info, including cpu0 + * struct vcpu_info. + * + * - The grant table is mapped using the HYPERVISOR_memory_op sub-op + * XENMEM_add_to_physmap, passing XENMAPSPACE_grant_table as space + * parameter. The memory range specified under the Xen compatible + * hypervisor node on device tree can be used as target gpfn for the + * mapping. + * + * - Xenstore is initialized by using the two hvm_params + * HVM_PARAM_STORE_PFN and HVM_PARAM_STORE_EVTCHN. They can be read + * with the HYPERVISOR_hvm_op sub-op HVMOP_get_param. + * + * - The paravirtualized console is initialized by using the two + * hvm_params HVM_PARAM_CONSOLE_PFN and HVM_PARAM_CONSOLE_EVTCHN. They + * can be read with the HYPERVISOR_hvm_op sub-op HVMOP_get_param. + * + * - Event channel notifications are delivered using the percpu GIC + * interrupt specified under the Xen compatible hypervisor node on + * device tree. + * + * - The device tree Xen compatible node is fully described under Linux + * at Documentation/devicetree/bindings/arm/xen.txt. + */ + +#define XEN_HYPERCALL_TAG 0XEA1 + +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) + +#ifndef __ASSEMBLY__ +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef union { type *p; unsigned long q; } \ + __guest_handle_ ## name; \ + typedef union { type *p; uint64_aligned_t q; } \ + __guest_handle_64_ ## name; + +/* + * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field + * in a struct in memory. On ARM is always 8 bytes sizes and 8 bytes + * aligned. + * XEN_GUEST_HANDLE_PARAM represents a guest pointer, when passed as an + * hypercall argument. It is 4 bytes on aarch32 and 8 bytes on aarch64. + */ +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + ___DEFINE_XEN_GUEST_HANDLE(name, type); \ + ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define __XEN_GUEST_HANDLE(name) __guest_handle_64_ ## name +#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) +#define XEN_GUEST_HANDLE_PARAM(name) __guest_handle_ ## name +#define set_xen_guest_handle_raw(hnd, val) \ + do { \ + typeof(&(hnd)) _sxghr_tmp = &(hnd); \ + _sxghr_tmp->q = 0; \ + _sxghr_tmp->p = val; \ + } while ( 0 ) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif +#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., r0/x0). */ +# define __DECL_REG(n64, n32) union { \ + uint64_t n64; \ + uint32_t n32; \ + } +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., x0). */ +#define __DECL_REG(n64, n32) uint64_t n64 +#endif + +struct vcpu_guest_core_regs +{ + /* Aarch64 Aarch32 */ + __DECL_REG(x0, r0_usr); + __DECL_REG(x1, r1_usr); + __DECL_REG(x2, r2_usr); + __DECL_REG(x3, r3_usr); + __DECL_REG(x4, r4_usr); + __DECL_REG(x5, r5_usr); + __DECL_REG(x6, r6_usr); + __DECL_REG(x7, r7_usr); + __DECL_REG(x8, r8_usr); + __DECL_REG(x9, r9_usr); + __DECL_REG(x10, r10_usr); + __DECL_REG(x11, r11_usr); + __DECL_REG(x12, r12_usr); + + __DECL_REG(x13, sp_usr); + __DECL_REG(x14, lr_usr); + + __DECL_REG(x15, __unused_sp_hyp); + + __DECL_REG(x16, lr_irq); + __DECL_REG(x17, sp_irq); + + __DECL_REG(x18, lr_svc); + __DECL_REG(x19, sp_svc); + + __DECL_REG(x20, lr_abt); + __DECL_REG(x21, sp_abt); + + __DECL_REG(x22, lr_und); + __DECL_REG(x23, sp_und); + + __DECL_REG(x24, r8_fiq); + __DECL_REG(x25, r9_fiq); + __DECL_REG(x26, r10_fiq); + __DECL_REG(x27, r11_fiq); + __DECL_REG(x28, r12_fiq); + + __DECL_REG(x29, sp_fiq); + __DECL_REG(x30, lr_fiq); + + /* Return address and mode */ + __DECL_REG(pc64, pc32); /* ELR_EL2 */ + uint32_t cpsr; /* SPSR_EL2 */ + + union { + uint32_t spsr_el1; /* AArch64 */ + uint32_t spsr_svc; /* AArch32 */ + }; + + /* AArch32 guests only */ + uint32_t spsr_fiq, spsr_irq, spsr_und, spsr_abt; + + /* AArch64 guests only */ + uint64_t sp_el0; + uint64_t sp_el1, elr_el1; +}; +typedef struct vcpu_guest_core_regs vcpu_guest_core_regs_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_core_regs_t); + +#undef __DECL_REG + +typedef uint64_t xen_pfn_t; +#define PRI_xen_pfn PRIx64 + +/* Maximum number of virtual CPUs in legacy multi-processor guests. */ +/* Only one. All other VCPUS must use VCPUOP_register_vcpu_info */ +#define XEN_LEGACY_MAX_VCPUS 1 + +typedef uint64_t xen_ulong_t; +#define PRI_xen_ulong PRIx64 + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +struct vcpu_guest_context { +#define _VGCF_online 0 +#define VGCF_online (1<<_VGCF_online) + uint32_t flags; /* VGCF_* */ + + struct vcpu_guest_core_regs user_regs; /* Core CPU registers */ + + uint32_t sctlr; + uint64_t ttbcr, ttbr0, ttbr1; +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); +#endif + +struct arch_vcpu_info { +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct arch_shared_info { +}; +typedef struct arch_shared_info arch_shared_info_t; +typedef uint64_t xen_callback_t; + +#endif + +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* PSR bits (CPSR, SPSR) */ + +#define PSR_THUMB (1<<5) /* Thumb Mode enable */ +#define PSR_FIQ_MASK (1<<6) /* Fast Interrupt mask */ +#define PSR_IRQ_MASK (1<<7) /* Interrupt mask */ +#define PSR_ABT_MASK (1<<8) /* Asynchronous Abort mask */ +#define PSR_BIG_ENDIAN (1<<9) /* arm32: Big Endian Mode */ +#define PSR_DBG_MASK (1<<9) /* arm64: Debug Exception mask */ +#define PSR_IT_MASK (0x0600fc00) /* Thumb If-Then Mask */ +#define PSR_JAZELLE (1<<24) /* Jazelle Mode */ + +/* 32 bit modes */ +#define PSR_MODE_USR 0x10 +#define PSR_MODE_FIQ 0x11 +#define PSR_MODE_IRQ 0x12 +#define PSR_MODE_SVC 0x13 +#define PSR_MODE_MON 0x16 +#define PSR_MODE_ABT 0x17 +#define PSR_MODE_HYP 0x1a +#define PSR_MODE_UND 0x1b +#define PSR_MODE_SYS 0x1f + +/* 64 bit modes */ +#define PSR_MODE_BIT 0x10 /* Set iff AArch32 */ +#define PSR_MODE_EL3h 0x0d +#define PSR_MODE_EL3t 0x0c +#define PSR_MODE_EL2h 0x09 +#define PSR_MODE_EL2t 0x08 +#define PSR_MODE_EL1h 0x05 +#define PSR_MODE_EL1t 0x04 +#define PSR_MODE_EL0t 0x00 + +#define PSR_GUEST32_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC) +#define PSR_GUEST64_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_EL1h) + +#define SCTLR_GUEST_INIT 0x00c50078 + +/* + * Virtual machine platform (memory layout, interrupts) + * + * These are defined for consistency between the tools and the + * hypervisor. Guests must not rely on these hardcoded values but + * should instead use the FDT. + */ + +/* Physical Address Space */ + +/* + * vGIC mappings: Only one set of mapping is used by the guest. + * Therefore they can overlap. + */ + +/* vGIC v2 mappings */ +#define GUEST_GICD_BASE 0x03001000ULL +#define GUEST_GICD_SIZE 0x00001000ULL +#define GUEST_GICC_BASE 0x03002000ULL +#define GUEST_GICC_SIZE 0x00000100ULL + +/* vGIC v3 mappings */ +#define GUEST_GICV3_GICD_BASE 0x03001000ULL +#define GUEST_GICV3_GICD_SIZE 0x00010000ULL + +#define GUEST_GICV3_RDIST_STRIDE 0x20000ULL +#define GUEST_GICV3_RDIST_REGIONS 1 + +#define GUEST_GICV3_GICR0_BASE 0x03020000ULL /* vCPU0 - vCPU7 */ +#define GUEST_GICV3_GICR0_SIZE 0x00100000ULL + +/* + * 16MB == 4096 pages reserved for guest to use as a region to map its + * grant table in. + */ +#define GUEST_GNTTAB_BASE 0x38000000ULL +#define GUEST_GNTTAB_SIZE 0x01000000ULL + +#define GUEST_MAGIC_BASE 0x39000000ULL +#define GUEST_MAGIC_SIZE 0x01000000ULL + +#define GUEST_RAM_BANKS 2 + +#define GUEST_RAM0_BASE 0x40000000ULL /* 3GB of low RAM @ 1GB */ +#define GUEST_RAM0_SIZE 0xc0000000ULL + +#define GUEST_RAM1_BASE 0x0200000000ULL /* 1016GB of RAM @ 8GB */ +#define GUEST_RAM1_SIZE 0xfe00000000ULL + +#define GUEST_RAM_BASE GUEST_RAM0_BASE /* Lowest RAM address */ +/* Largest amount of actual RAM, not including holes */ +#define GUEST_RAM_MAX (GUEST_RAM0_SIZE + GUEST_RAM1_SIZE) +/* Suitable for e.g. const uint64_t ramfoo[] = GUEST_RAM_BANK_FOOS; */ +#define GUEST_RAM_BANK_BASES { GUEST_RAM0_BASE, GUEST_RAM1_BASE } +#define GUEST_RAM_BANK_SIZES { GUEST_RAM0_SIZE, GUEST_RAM1_SIZE } + +/* Interrupts */ +#define GUEST_TIMER_VIRT_PPI 27 +#define GUEST_TIMER_PHYS_S_PPI 29 +#define GUEST_TIMER_PHYS_NS_PPI 30 +#define GUEST_EVTCHN_PPI 31 + +/* PSCI functions */ +#define PSCI_cpu_suspend 0 +#define PSCI_cpu_off 1 +#define PSCI_cpu_on 2 +#define PSCI_migrate 3 + +#endif + +#endif /* __XEN_PUBLIC_ARCH_ARM_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/arch-x86/cpuid.h xen-4.6.5/extras/mini-os/include/xen/arch-x86/cpuid.h --- xen-4.6.0/extras/mini-os/include/xen/arch-x86/cpuid.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/arch-x86/cpuid.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,90 @@ +/****************************************************************************** + * arch-x86/cpuid.h + * + * CPUID interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007 Citrix Systems, Inc. + * + * Authors: + * Keir Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ +#define __XEN_PUBLIC_ARCH_X86_CPUID_H__ + +/* + * For compatibility with other hypervisor interfaces, the Xen cpuid leaves + * can be found at the first otherwise unused 0x100 aligned boundary starting + * from 0x40000000. + * + * e.g If viridian extensions are enabled for an HVM domain, the Xen cpuid + * leaves will start at 0x40000100 + */ + +#define XEN_CPUID_FIRST_LEAF 0x40000000 +#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) + +/* + * Leaf 1 (0x40000x00) + * EAX: Largest Xen-information leaf. All leaves up to an including @EAX + * are supported by the Xen host. + * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification + * of a Xen host. + */ +#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ +#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ +#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ + +/* + * Leaf 2 (0x40000x01) + * EAX[31:16]: Xen major version. + * EAX[15: 0]: Xen minor version. + * EBX-EDX: Reserved (currently all zeroes). + */ + +/* + * Leaf 3 (0x40000x02) + * EAX: Number of hypercall transfer pages. This register is always guaranteed + * to specify one hypercall page. + * EBX: Base address of Xen-specific MSRs. + * ECX: Features 1. Unused bits are set to zero. + * EDX: Features 2. Unused bits are set to zero. + */ + +/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ +#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 +#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) + +/* + * Leaf 5 (0x40000x04) + * HVM-specific features + * EAX: Features + * EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag) + */ +#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */ +#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Virtualized x2APIC accesses */ +/* Memory mapped from other domains has valid IOMMU entries */ +#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) +#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */ + +#define XEN_CPUID_MAX_NUM_LEAVES 4 + +#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/arch-x86/hvm/save.h xen-4.6.5/extras/mini-os/include/xen/arch-x86/hvm/save.h --- xen-4.6.0/extras/mini-os/include/xen/arch-x86/hvm/save.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/arch-x86/hvm/save.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,630 @@ +/* + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * Copyright (c) 2007 XenSource Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_X86_H__ +#define __XEN_PUBLIC_HVM_SAVE_X86_H__ + +/* + * Save/restore header: general info about the save file. + */ + +#define HVM_FILE_MAGIC 0x54381286 +#define HVM_FILE_VERSION 0x00000001 + +struct hvm_save_header { + uint32_t magic; /* Must be HVM_FILE_MAGIC */ + uint32_t version; /* File format version */ + uint64_t changeset; /* Version of Xen that saved this file */ + uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ + uint32_t gtsc_khz; /* Guest's TSC frequency in kHz */ +}; + +DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); + + +/* + * Processor + * + * Compat: Pre-3.4 didn't have msr_tsc_aux + */ + +struct hvm_hw_cpu { + uint8_t fpu_regs[512]; + + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; + + uint32_t cs_sel; + uint32_t ds_sel; + uint32_t es_sel; + uint32_t fs_sel; + uint32_t gs_sel; + uint32_t ss_sel; + uint32_t tr_sel; + uint32_t ldtr_sel; + + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t es_limit; + uint32_t fs_limit; + uint32_t gs_limit; + uint32_t ss_limit; + uint32_t tr_limit; + uint32_t ldtr_limit; + uint32_t idtr_limit; + uint32_t gdtr_limit; + + uint64_t cs_base; + uint64_t ds_base; + uint64_t es_base; + uint64_t fs_base; + uint64_t gs_base; + uint64_t ss_base; + uint64_t tr_base; + uint64_t ldtr_base; + uint64_t idtr_base; + uint64_t gdtr_base; + + uint32_t cs_arbytes; + uint32_t ds_arbytes; + uint32_t es_arbytes; + uint32_t fs_arbytes; + uint32_t gs_arbytes; + uint32_t ss_arbytes; + uint32_t tr_arbytes; + uint32_t ldtr_arbytes; + + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + + /* msr for em64t */ + uint64_t shadow_gs; + + /* msr content saved/restored. */ + uint64_t msr_flags; + uint64_t msr_lstar; + uint64_t msr_star; + uint64_t msr_cstar; + uint64_t msr_syscall_mask; + uint64_t msr_efer; + uint64_t msr_tsc_aux; + + /* guest's idea of what rdtsc() would return */ + uint64_t tsc; + + /* pending event, if any */ + union { + uint32_t pending_event; + struct { + uint8_t pending_vector:8; + uint8_t pending_type:3; + uint8_t pending_error_valid:1; + uint32_t pending_reserved:19; + uint8_t pending_valid:1; + }; + }; + /* error code for pending event */ + uint32_t error_code; +}; + +struct hvm_hw_cpu_compat { + uint8_t fpu_regs[512]; + + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; + + uint32_t cs_sel; + uint32_t ds_sel; + uint32_t es_sel; + uint32_t fs_sel; + uint32_t gs_sel; + uint32_t ss_sel; + uint32_t tr_sel; + uint32_t ldtr_sel; + + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t es_limit; + uint32_t fs_limit; + uint32_t gs_limit; + uint32_t ss_limit; + uint32_t tr_limit; + uint32_t ldtr_limit; + uint32_t idtr_limit; + uint32_t gdtr_limit; + + uint64_t cs_base; + uint64_t ds_base; + uint64_t es_base; + uint64_t fs_base; + uint64_t gs_base; + uint64_t ss_base; + uint64_t tr_base; + uint64_t ldtr_base; + uint64_t idtr_base; + uint64_t gdtr_base; + + uint32_t cs_arbytes; + uint32_t ds_arbytes; + uint32_t es_arbytes; + uint32_t fs_arbytes; + uint32_t gs_arbytes; + uint32_t ss_arbytes; + uint32_t tr_arbytes; + uint32_t ldtr_arbytes; + + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + + /* msr for em64t */ + uint64_t shadow_gs; + + /* msr content saved/restored. */ + uint64_t msr_flags; + uint64_t msr_lstar; + uint64_t msr_star; + uint64_t msr_cstar; + uint64_t msr_syscall_mask; + uint64_t msr_efer; + /*uint64_t msr_tsc_aux; COMPAT */ + + /* guest's idea of what rdtsc() would return */ + uint64_t tsc; + + /* pending event, if any */ + union { + uint32_t pending_event; + struct { + uint8_t pending_vector:8; + uint8_t pending_type:3; + uint8_t pending_error_valid:1; + uint32_t pending_reserved:19; + uint8_t pending_valid:1; + }; + }; + /* error code for pending event */ + uint32_t error_code; +}; + +static inline int _hvm_hw_fix_cpu(void *h) { + + union hvm_hw_cpu_union { + struct hvm_hw_cpu nat; + struct hvm_hw_cpu_compat cmp; + } *ucpu = (union hvm_hw_cpu_union *)h; + + /* If we copy from the end backwards, we should + * be able to do the modification in-place */ + ucpu->nat.error_code = ucpu->cmp.error_code; + ucpu->nat.pending_event = ucpu->cmp.pending_event; + ucpu->nat.tsc = ucpu->cmp.tsc; + ucpu->nat.msr_tsc_aux = 0; + + return 0; +} + +DECLARE_HVM_SAVE_TYPE_COMPAT(CPU, 2, struct hvm_hw_cpu, \ + struct hvm_hw_cpu_compat, _hvm_hw_fix_cpu); + +/* + * PIC + */ + +struct hvm_hw_vpic { + /* IR line bitmasks. */ + uint8_t irr; + uint8_t imr; + uint8_t isr; + + /* Line IRx maps to IRQ irq_base+x */ + uint8_t irq_base; + + /* + * Where are we in ICW2-4 initialisation (0 means no init in progress)? + * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1). + * Bit 2: ICW1.IC4 (1 == ICW4 included in init sequence) + * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence) + */ + uint8_t init_state:4; + + /* IR line with highest priority. */ + uint8_t priority_add:4; + + /* Reads from A=0 obtain ISR or IRR? */ + uint8_t readsel_isr:1; + + /* Reads perform a polling read? */ + uint8_t poll:1; + + /* Automatically clear IRQs from the ISR during INTA? */ + uint8_t auto_eoi:1; + + /* Automatically rotate IRQ priorities during AEOI? */ + uint8_t rotate_on_auto_eoi:1; + + /* Exclude slave inputs when considering in-service IRQs? */ + uint8_t special_fully_nested_mode:1; + + /* Special mask mode excludes masked IRs from AEOI and priority checks. */ + uint8_t special_mask_mode:1; + + /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */ + uint8_t is_master:1; + + /* Edge/trigger selection. */ + uint8_t elcr; + + /* Virtual INT output. */ + uint8_t int_output; +}; + +DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic); + + +/* + * IO-APIC + */ + +#define VIOAPIC_NUM_PINS 48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */ + +struct hvm_hw_vioapic { + uint64_t base_address; + uint32_t ioregsel; + uint32_t id; + union vioapic_redir_entry + { + uint64_t bits; + struct { + uint8_t vector; + uint8_t delivery_mode:3; + uint8_t dest_mode:1; + uint8_t delivery_status:1; + uint8_t polarity:1; + uint8_t remote_irr:1; + uint8_t trig_mode:1; + uint8_t mask:1; + uint8_t reserve:7; + uint8_t reserved[4]; + uint8_t dest_id; + } fields; + } redirtbl[VIOAPIC_NUM_PINS]; +}; + +DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic); + + +/* + * LAPIC + */ + +struct hvm_hw_lapic { + uint64_t apic_base_msr; + uint32_t disabled; /* VLAPIC_xx_DISABLED */ + uint32_t timer_divisor; + uint64_t tdt_msr; +}; + +DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic); + +struct hvm_hw_lapic_regs { + uint8_t data[1024]; +}; + +DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs); + + +/* + * IRQs + */ + +struct hvm_hw_pci_irqs { + /* + * Virtual interrupt wires for a single PCI bus. + * Indexed by: device*4 + INTx#. + */ + union { + unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */ + uint64_t pad[2]; + }; +}; + +DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs); + +struct hvm_hw_isa_irqs { + /* + * Virtual interrupt wires for ISA devices. + * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). + */ + union { + unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */ + uint64_t pad[1]; + }; +}; + +DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs); + +struct hvm_hw_pci_link { + /* + * PCI-ISA interrupt router. + * Each PCI is 'wire-ORed' into one of four links using + * the traditional 'barber's pole' mapping ((device + INTx#) & 3). + * The router provides a programmable mapping from each link to a GSI. + */ + uint8_t route[4]; + uint8_t pad0[4]; +}; + +DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link); + +/* + * PIT + */ + +struct hvm_hw_pit { + struct hvm_hw_pit_channel { + uint32_t count; /* can be 65536 */ + uint16_t latched_count; + uint8_t count_latched; + uint8_t status_latched; + uint8_t status; + uint8_t read_state; + uint8_t write_state; + uint8_t write_latch; + uint8_t rw_mode; + uint8_t mode; + uint8_t bcd; /* not supported */ + uint8_t gate; /* timer start */ + } channels[3]; /* 3 x 16 bytes */ + uint32_t speaker_data_on; + uint32_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit); + + +/* + * RTC + */ + +#define RTC_CMOS_SIZE 14 +struct hvm_hw_rtc { + /* CMOS bytes */ + uint8_t cmos_data[RTC_CMOS_SIZE]; + /* Index register for 2-part operations */ + uint8_t cmos_index; + uint8_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc); + + +/* + * HPET + */ + +#define HPET_TIMER_NUM 3 /* 3 timers supported now */ +struct hvm_hw_hpet { + /* Memory-mapped, software visible registers */ + uint64_t capability; /* capabilities */ + uint64_t res0; /* reserved */ + uint64_t config; /* configuration */ + uint64_t res1; /* reserved */ + uint64_t isr; /* interrupt status reg */ + uint64_t res2[25]; /* reserved */ + uint64_t mc64; /* main counter */ + uint64_t res3; /* reserved */ + struct { /* timers */ + uint64_t config; /* configuration/cap */ + uint64_t cmp; /* comparator */ + uint64_t fsb; /* FSB route, not supported now */ + uint64_t res4; /* reserved */ + } timers[HPET_TIMER_NUM]; + uint64_t res5[4*(24-HPET_TIMER_NUM)]; /* reserved, up to 0x3ff */ + + /* Hidden register state */ + uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */ +}; + +DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet); + + +/* + * PM timer + */ + +struct hvm_hw_pmtimer { + uint32_t tmr_val; /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */ + uint16_t pm1a_sts; /* PM1a_EVT_BLK.PM1a_STS: status register */ + uint16_t pm1a_en; /* PM1a_EVT_BLK.PM1a_EN: enable register */ +}; + +DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer); + +/* + * MTRR MSRs + */ + +struct hvm_hw_mtrr { +#define MTRR_VCNT 8 +#define NUM_FIXED_MSR 11 + uint64_t msr_pat_cr; + /* mtrr physbase & physmask msr pair*/ + uint64_t msr_mtrr_var[MTRR_VCNT*2]; + uint64_t msr_mtrr_fixed[NUM_FIXED_MSR]; + uint64_t msr_mtrr_cap; + uint64_t msr_mtrr_def_type; +}; + +DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr); + +/* + * The save area of XSAVE/XRSTOR. + */ + +struct hvm_hw_cpu_xsave { + uint64_t xfeature_mask; /* Ignored */ + uint64_t xcr0; /* Updated by XSETBV */ + uint64_t xcr0_accum; /* Updated by XSETBV */ + struct { + struct { char x[512]; } fpu_sse; + + struct { + uint64_t xstate_bv; /* Updated by XRSTOR */ + uint64_t reserved[7]; + } xsave_hdr; /* The 64-byte header */ + + struct { char x[0]; } ymm; /* YMM */ + } save_area; +}; + +#define CPU_XSAVE_CODE 16 + +/* + * Viridian hypervisor context. + */ + +struct hvm_viridian_domain_context { + uint64_t hypercall_gpa; + uint64_t guest_os_id; + uint64_t time_ref_count; + uint64_t reference_tsc; +}; + +DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context); + +struct hvm_viridian_vcpu_context { + uint64_t apic_assist; +}; + +DECLARE_HVM_SAVE_TYPE(VIRIDIAN_VCPU, 17, struct hvm_viridian_vcpu_context); + +struct hvm_vmce_vcpu { + uint64_t caps; + uint64_t mci_ctl2_bank0; + uint64_t mci_ctl2_bank1; +}; + +DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu); + +struct hvm_tsc_adjust { + uint64_t tsc_adjust; +}; + +DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); + + +struct hvm_msr { + uint32_t count; + struct hvm_one_msr { + uint32_t index; + uint32_t _rsvd; + uint64_t val; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + } msr[]; +#elif defined(__GNUC__) + } msr[0]; +#else + } msr[1 /* variable size */]; +#endif +}; + +#define CPU_MSR_CODE 20 + +/* + * Largest type-code in use + */ +#define HVM_SAVE_CODE_MAX 20 + +#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/arch-x86/xen.h xen-4.6.5/extras/mini-os/include/xen/arch-x86/xen.h --- xen-4.6.0/extras/mini-os/include/xen/arch-x86/xen.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/arch-x86/xen.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,303 @@ +/****************************************************************************** + * arch-x86/xen.h + * + * Guest OS interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "../xen.h" + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_H__ + +/* Structural guest handles introduced in 0x00030201. */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030201 +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name +#else +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef type * __guest_handle_ ## name +#endif + +/* + * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field + * in a struct in memory. + * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an + * hypercall argument. + * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but + * they might not be on other architectures. + */ +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + ___DEFINE_XEN_GUEST_HANDLE(name, type); \ + ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) +#define XEN_GUEST_HANDLE_PARAM(name) XEN_GUEST_HANDLE(name) +#define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif +#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) + +#if defined(__i386__) +#include "xen-x86_32.h" +#elif defined(__x86_64__) +#include "xen-x86_64.h" +#endif + +#ifndef __ASSEMBLY__ +typedef unsigned long xen_pfn_t; +#define PRI_xen_pfn "lx" +#endif + +#define XEN_HAVE_PV_GUEST_ENTRY 1 + +#define XEN_HAVE_PV_UPCALL_MASK 1 + +/* + * `incontents 200 segdesc Segment Descriptor Tables + */ +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries); + * ` + */ +/* + * A number of GDT entries are reserved by Xen. These are not situated at the + * start of the GDT because some stupid OSes export hard-coded selector values + * in their ABI. These hard-coded values are always near the start of the GDT, + * so Xen places itself out of the way, at the far end of the GDT. + * + * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op + */ +#define FIRST_RESERVED_GDT_PAGE 14 +#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) +#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) + + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc); + * ` + * ` @pa The machine physical address of the descriptor to + * ` update. Must be either a descriptor page or writable. + * ` @desc The descriptor value to update, in the same format as a + * ` native descriptor table entry. + */ + +/* Maximum number of virtual CPUs in legacy multi-processor guests. */ +#define XEN_LEGACY_MAX_VCPUS 32 + +#ifndef __ASSEMBLY__ + +typedef unsigned long xen_ulong_t; +#define PRI_xen_ulong "lx" + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp); + * ` + * Sets the stack segment and pointer for the current vcpu. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]); + * ` + */ +/* + * Send an array of these to HYPERVISOR_set_trap_table(). + * Terminate the array with a sentinel entry, with traps[].address==0. + * The privilege level specifies which modes may enter a trap via a software + * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate + * privilege levels as follows: + * Level == 0: Noone may enter + * Level == 1: Kernel may enter + * Level == 2: Kernel may enter + * Level == 3: Everyone may enter + */ +#define TI_GET_DPL(_ti) ((_ti)->flags & 3) +#define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) +#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) +struct trap_info { + uint8_t vector; /* exception vector */ + uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ + uint16_t cs; /* code selector */ + unsigned long address; /* code offset */ +}; +typedef struct trap_info trap_info_t; +DEFINE_XEN_GUEST_HANDLE(trap_info_t); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +/* + * The following is all CPU context. Note that the fpu_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + * + * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise + * for HVM and PVH guests, not all information in this structure is updated: + * + * - For HVM guests, the structures read include: fpu_ctxt (if + * VGCT_I387_VALID is set), flags, user_regs, debugreg[*] + * + * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to + * set cr3. All other fields not used should be set to 0. + */ +struct vcpu_guest_context { + /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ + struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ +#define VGCF_I387_VALID (1<<0) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) +#define _VGCF_syscall_disables_events 4 +#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) +#define _VGCF_online 5 +#define VGCF_online (1<<_VGCF_online) + unsigned long flags; /* VGCF_* flags */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ + struct trap_info trap_ctxt[256]; /* Virtual IDT */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ + unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ +#ifdef __i386__ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; +#else + unsigned long event_callback_eip; + unsigned long failsafe_callback_eip; +#ifdef __XEN__ + union { + unsigned long syscall_callback_eip; + struct { + unsigned int event_callback_cs; /* compat CS of event cb */ + unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ + }; + }; +#else + unsigned long syscall_callback_eip; +#endif +#endif + unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ +#ifdef __x86_64__ + /* Segment base addresses. */ + uint64_t fs_base; + uint64_t gs_base_kernel; + uint64_t gs_base_user; +#endif +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +struct arch_shared_info { + /* + * Number of valid entries in the p2m table(s) anchored at + * pfn_to_mfn_frame_list_list and/or p2m_vaddr. + */ + unsigned long max_pfn; + /* + * Frame containing list of mfns containing list of mfns containing p2m. + * A value of 0 indicates it has not yet been set up, ~0 indicates it has + * been set to invalid e.g. due to the p2m being too large for the 3-level + * p2m tree. In this case the linear mapper p2m list anchored at p2m_vaddr + * is to be used. + */ + xen_pfn_t pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; + /* + * Following three fields are valid if p2m_cr3 contains a value different + * from 0. + * p2m_cr3 is the root of the address space where p2m_vaddr is valid. + * p2m_cr3 is in the same format as a cr3 value in the vcpu register state + * and holds the folded machine frame number (via xen_pfn_to_cr3) of a + * L3 or L4 page table. + * p2m_vaddr holds the virtual address of the linear p2m list. All entries + * in the range [0...max_pfn[ are accessible via this pointer. + * p2m_generation will be incremented by the guest before and after each + * change of the mappings of the p2m list. p2m_generation starts at 0 and + * a value with the least significant bit set indicates that a mapping + * update is in progress. This allows guest external software (e.g. in Dom0) + * to verify that read mappings are consistent and whether they have changed + * since the last check. + * Modifying a p2m element in the linear p2m list is allowed via an atomic + * write only. + */ + unsigned long p2m_cr3; /* cr3 value of the p2m address space */ + unsigned long p2m_vaddr; /* virtual address of the p2m list */ + unsigned long p2m_generation; /* generation count of p2m mapping */ +}; +typedef struct arch_shared_info arch_shared_info_t; + +#endif /* !__ASSEMBLY__ */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_fpu_taskswitch(int set); + * ` + * Sets (if set!=0) or clears (if set==0) CR0.TS. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_debugreg(int regno, unsigned long value); + * + * ` unsigned long + * ` HYPERVISOR_get_debugreg(int regno); + * For 0<=reg<=7, returns the debug register value. + * For other values of reg, returns ((unsigned long)-EINVAL). + * (Unfortunately, this interface is defective.) + */ + +/* + * Prefix forces emulation of some non-trapping instructions. + * Currently only CPUID. + */ +#ifdef __ASSEMBLY__ +#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; +#define XEN_CPUID XEN_EMULATE_PREFIX cpuid +#else +#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " +#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" +#endif + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/arch-x86/xen-mca.h xen-4.6.5/extras/mini-os/include/xen/arch-x86/xen-mca.h --- xen-4.6.0/extras/mini-os/include/xen/arch-x86/xen-mca.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/arch-x86/xen-mca.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,440 @@ +/****************************************************************************** + * arch-x86/mca.h + * + * Contributed by Advanced Micro Devices, Inc. + * Author: Christoph Egger + * + * Guest OS machine check interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* Full MCA functionality has the following Usecases from the guest side: + * + * Must have's: + * 1. Dom0 and DomU register machine check trap callback handlers + * (already done via "set_trap_table" hypercall) + * 2. Dom0 registers machine check event callback handler + * (doable via EVTCHNOP_bind_virq) + * 3. Dom0 and DomU fetches machine check data + * 4. Dom0 wants Xen to notify a DomU + * 5. Dom0 gets DomU ID from physical address + * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy") + * + * Nice to have's: + * 7. Dom0 wants Xen to deactivate a physical CPU + * This is better done as separate task, physical CPU hotplugging, + * and hypercall(s) should be sysctl's + * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to + * move a DomU (or Dom0 itself) away from a malicious page + * producing correctable errors. + * 9. offlining physical page: + * Xen free's and never re-uses a certain physical page. + * 10. Testfacility: Allow Dom0 to write values into machine check MSR's + * and tell Xen to trigger a machine check + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__ +#define __XEN_PUBLIC_ARCH_X86_MCA_H__ + +/* Hypercall */ +#define __HYPERVISOR_mca __HYPERVISOR_arch_0 + +/* + * The xen-unstable repo has interface version 0x03000001; out interface + * is incompatible with that and any future minor revisions, so we + * choose a different version number range that is numerically less + * than that used in xen-unstable. + */ +#define XEN_MCA_INTERFACE_VERSION 0x01ecc003 + +/* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */ +#define XEN_MC_NONURGENT 0x0001 +/* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */ +#define XEN_MC_URGENT 0x0002 +/* IN: Dom0 acknowledges previosly-fetched telemetry */ +#define XEN_MC_ACK 0x0004 + +/* OUT: All is ok */ +#define XEN_MC_OK 0x0 +/* OUT: Domain could not fetch data. */ +#define XEN_MC_FETCHFAILED 0x1 +/* OUT: There was no machine check data to fetch. */ +#define XEN_MC_NODATA 0x2 +/* OUT: Between notification time and this hypercall an other + * (most likely) correctable error happened. The fetched data, + * does not match the original machine check data. */ +#define XEN_MC_NOMATCH 0x4 + +/* OUT: DomU did not register MC NMI handler. Try something else. */ +#define XEN_MC_CANNOTHANDLE 0x8 +/* OUT: Notifying DomU failed. Retry later or try something else. */ +#define XEN_MC_NOTDELIVERED 0x10 +/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */ + + +#ifndef __ASSEMBLY__ + +#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */ + +/* + * Machine Check Architecure: + * structs are read-only and used to report all kinds of + * correctable and uncorrectable errors detected by the HW. + * Dom0 and DomU: register a handler to get notified. + * Dom0 only: Correctable errors are reported via VIRQ_MCA + * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers + */ +#define MC_TYPE_GLOBAL 0 +#define MC_TYPE_BANK 1 +#define MC_TYPE_EXTENDED 2 +#define MC_TYPE_RECOVERY 3 + +struct mcinfo_common { + uint16_t type; /* structure type */ + uint16_t size; /* size of this struct in bytes */ +}; + + +#define MC_FLAG_CORRECTABLE (1 << 0) +#define MC_FLAG_UNCORRECTABLE (1 << 1) +#define MC_FLAG_RECOVERABLE (1 << 2) +#define MC_FLAG_POLLED (1 << 3) +#define MC_FLAG_RESET (1 << 4) +#define MC_FLAG_CMCI (1 << 5) +#define MC_FLAG_MCE (1 << 6) +/* contains global x86 mc information */ +struct mcinfo_global { + struct mcinfo_common common; + + /* running domain at the time in error (most likely the impacted one) */ + uint16_t mc_domid; + uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ + uint32_t mc_socketid; /* physical socket of the physical core */ + uint16_t mc_coreid; /* physical impacted core */ + uint16_t mc_core_threadid; /* core thread of physical core */ + uint32_t mc_apicid; + uint32_t mc_flags; + uint64_t mc_gstatus; /* global status */ +}; + +/* contains bank local x86 mc information */ +struct mcinfo_bank { + struct mcinfo_common common; + + uint16_t mc_bank; /* bank nr */ + uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0 + * and if mc_addr is valid. Never valid on DomU. */ + uint64_t mc_status; /* bank status */ + uint64_t mc_addr; /* bank address, only valid + * if addr bit is set in mc_status */ + uint64_t mc_misc; + uint64_t mc_ctrl2; + uint64_t mc_tsc; +}; + + +struct mcinfo_msr { + uint64_t reg; /* MSR */ + uint64_t value; /* MSR value */ +}; + +/* contains mc information from other + * or additional mc MSRs */ +struct mcinfo_extended { + struct mcinfo_common common; + + /* You can fill up to five registers. + * If you need more, then use this structure + * multiple times. */ + + uint32_t mc_msrs; /* Number of msr with valid values. */ + /* + * Currently Intel extended MSR (32/64) include all gp registers + * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be + * useful at present. So expand this array to 16/32 to leave room. + */ + struct mcinfo_msr mc_msr[sizeof(void *) * 4]; +}; + +/* Recovery Action flags. Giving recovery result information to DOM0 */ + +/* Xen takes successful recovery action, the error is recovered */ +#define REC_ACTION_RECOVERED (0x1 << 0) +/* No action is performed by XEN */ +#define REC_ACTION_NONE (0x1 << 1) +/* It's possible DOM0 might take action ownership in some case */ +#define REC_ACTION_NEED_RESET (0x1 << 2) + +/* Different Recovery Action types, if the action is performed successfully, + * REC_ACTION_RECOVERED flag will be returned. + */ + +/* Page Offline Action */ +#define MC_ACTION_PAGE_OFFLINE (0x1 << 0) +/* CPU offline Action */ +#define MC_ACTION_CPU_OFFLINE (0x1 << 1) +/* L3 cache disable Action */ +#define MC_ACTION_CACHE_SHRINK (0x1 << 2) + +/* Below interface used between XEN/DOM0 for passing XEN's recovery action + * information to DOM0. + * usage Senario: After offlining broken page, XEN might pass its page offline + * recovery action result to DOM0. DOM0 will save the information in + * non-volatile memory for further proactive actions, such as offlining the + * easy broken page earlier when doing next reboot. +*/ +struct page_offline_action +{ + /* Params for passing the offlined page number to DOM0 */ + uint64_t mfn; + uint64_t status; +}; + +struct cpu_offline_action +{ + /* Params for passing the identity of the offlined CPU to DOM0 */ + uint32_t mc_socketid; + uint16_t mc_coreid; + uint16_t mc_core_threadid; +}; + +#define MAX_UNION_SIZE 16 +struct mcinfo_recovery +{ + struct mcinfo_common common; + uint16_t mc_bank; /* bank nr */ + uint8_t action_flags; + uint8_t action_types; + union { + struct page_offline_action page_retire; + struct cpu_offline_action cpu_offline; + uint8_t pad[MAX_UNION_SIZE]; + } action_info; +}; + + +#define MCINFO_HYPERCALLSIZE 1024 +#define MCINFO_MAXSIZE 768 + +#define MCINFO_FLAGS_UNCOMPLETE 0x1 +struct mc_info { + /* Number of mcinfo_* entries in mi_data */ + uint32_t mi_nentries; + uint32_t flags; + uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8]; +}; +typedef struct mc_info mc_info_t; +DEFINE_XEN_GUEST_HANDLE(mc_info_t); + +#define __MC_MSR_ARRAYSIZE 8 +#define __MC_NMSRS 1 +#define MC_NCAPS 7 /* 7 CPU feature flag words */ +#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */ +#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */ +#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */ +#define MC_CAPS_LINUX 3 /* Linux-defined */ +#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */ +#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ +#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ + +struct mcinfo_logical_cpu { + uint32_t mc_cpunr; + uint32_t mc_chipid; + uint16_t mc_coreid; + uint16_t mc_threadid; + uint32_t mc_apicid; + uint32_t mc_clusterid; + uint32_t mc_ncores; + uint32_t mc_ncores_active; + uint32_t mc_nthreads; + int32_t mc_cpuid_level; + uint32_t mc_family; + uint32_t mc_vendor; + uint32_t mc_model; + uint32_t mc_step; + char mc_vendorid[16]; + char mc_brandid[64]; + uint32_t mc_cpu_caps[MC_NCAPS]; + uint32_t mc_cache_size; + uint32_t mc_cache_alignment; + int32_t mc_nmsrvals; + struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; +}; +typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); + + +/* + * OS's should use these instead of writing their own lookup function + * each with its own bugs and drawbacks. + * We use macros instead of static inline functions to allow guests + * to include this header in assembly files (*.S). + */ +/* Prototype: + * uint32_t x86_mcinfo_nentries(struct mc_info *mi); + */ +#define x86_mcinfo_nentries(_mi) \ + (_mi)->mi_nentries +/* Prototype: + * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); + */ +#define x86_mcinfo_first(_mi) \ + ((struct mcinfo_common *)(_mi)->mi_data) +/* Prototype: + * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); + */ +#define x86_mcinfo_next(_mic) \ + ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)) + +/* Prototype: + * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); + */ +#define x86_mcinfo_lookup(_ret, _mi, _type) \ + do { \ + uint32_t found, i; \ + struct mcinfo_common *_mic; \ + \ + found = 0; \ + (_ret) = NULL; \ + if (_mi == NULL) break; \ + _mic = x86_mcinfo_first(_mi); \ + for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \ + if (_mic->type == (_type)) { \ + found = 1; \ + break; \ + } \ + _mic = x86_mcinfo_next(_mic); \ + } \ + (_ret) = found ? _mic : NULL; \ + } while (0) + + +/* Usecase 1 + * Register machine check trap callback handler + * (already done via "set_trap_table" hypercall) + */ + +/* Usecase 2 + * Dom0 registers machine check event callback handler + * done by EVTCHNOP_bind_virq + */ + +/* Usecase 3 + * Fetch machine check data from hypervisor. + * Note, this hypercall is special, because both Dom0 and DomU must use this. + */ +#define XEN_MC_fetch 1 +struct xen_mc_fetch { + /* IN/OUT variables. */ + uint32_t flags; /* IN: XEN_MC_NONURGENT, XEN_MC_URGENT, + XEN_MC_ACK if ack'ing an earlier fetch */ + /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, + XEN_MC_NODATA, XEN_MC_NOMATCH */ + uint32_t _pad0; + uint64_t fetch_id; /* OUT: id for ack, IN: id we are ack'ing */ + + /* OUT variables. */ + XEN_GUEST_HANDLE(mc_info_t) data; +}; +typedef struct xen_mc_fetch xen_mc_fetch_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t); + + +/* Usecase 4 + * This tells the hypervisor to notify a DomU about the machine check error + */ +#define XEN_MC_notifydomain 2 +struct xen_mc_notifydomain { + /* IN variables. */ + uint16_t mc_domid; /* The unprivileged domain to notify. */ + uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify. + * Usually echo'd value from the fetch hypercall. */ + + /* IN/OUT variables. */ + uint32_t flags; + +/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */ +/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */ +}; +typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); + +#define XEN_MC_physcpuinfo 3 +struct xen_mc_physcpuinfo { + /* IN/OUT */ + uint32_t ncpus; + uint32_t _pad0; + /* OUT */ + XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; +}; + +#define XEN_MC_msrinject 4 +#define MC_MSRINJ_MAXMSRS 8 +struct xen_mc_msrinject { + /* IN */ + uint32_t mcinj_cpunr; /* target processor id */ + uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ + uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ + uint32_t _pad0; + struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; +}; + +/* Flags for mcinj_flags above; bits 16-31 are reserved */ +#define MC_MSRINJ_F_INTERPOSE 0x1 + +#define XEN_MC_mceinject 5 +struct xen_mc_mceinject { + unsigned int mceinj_cpunr; /* target processor id */ +}; + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#define XEN_MC_inject_v2 6 +#define XEN_MC_INJECT_TYPE_MASK 0x7 +#define XEN_MC_INJECT_TYPE_MCE 0x0 +#define XEN_MC_INJECT_TYPE_CMCI 0x1 + +#define XEN_MC_INJECT_CPU_BROADCAST 0x8 + +struct xen_mc_inject_v2 { + uint32_t flags; + struct xenctl_bitmap cpumap; +}; +#endif + +struct xen_mc { + uint32_t cmd; + uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ + union { + struct xen_mc_fetch mc_fetch; + struct xen_mc_notifydomain mc_notifydomain; + struct xen_mc_physcpuinfo mc_physcpuinfo; + struct xen_mc_msrinject mc_msrinject; + struct xen_mc_mceinject mc_mceinject; +#if defined(__XEN__) || defined(__XEN_TOOLS__) + struct xen_mc_inject_v2 mc_inject_v2; +#endif + } u; +}; +typedef struct xen_mc xen_mc_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_t); + +#endif /* __ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/arch-x86/xen-x86_32.h xen-4.6.5/extras/mini-os/include/xen/arch-x86/xen-x86_32.h --- xen-4.6.0/extras/mini-os/include/xen/arch-x86/xen-x86_32.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/arch-x86/xen-x86_32.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,171 @@ +/****************************************************************************** + * xen-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2007, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ + +/* + * Hypercall interface: + * Input: %ebx, %ecx, %edx, %esi, %edi, %ebp (arguments 1-6) + * Output: %eax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) + */ + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +#define __HYPERVISOR_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_END_PAE 0xF6800000 +#define HYPERVISOR_VIRT_START_PAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) +#define MACH2PHYS_VIRT_START_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) +#define MACH2PHYS_VIRT_END_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) + +/* Non-PAE bounds are obsolete. */ +#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 +#define HYPERVISOR_VIRT_START_NONPAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_START_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_END_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) + +#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE +#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE +#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) +#endif + +/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#undef ___DEFINE_XEN_GUEST_HANDLE +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } \ + __guest_handle_ ## name; \ + typedef struct { union { type *p; uint64_aligned_t q; }; } \ + __guest_handle_64_ ## name +#undef set_xen_guest_handle_raw +#define set_xen_guest_handle_raw(hnd, val) \ + do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ + (hnd).p = val; \ + } while ( 0 ) +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) +#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name +#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name) +#endif + +#ifndef __ASSEMBLY__ + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct xen_callback { + unsigned long cs; + unsigned long eip; +}; +typedef struct xen_callback xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/arch-x86/xen-x86_64.h xen-4.6.5/extras/mini-os/include/xen/arch-x86/xen-x86_64.h --- xen-4.6.0/extras/mini-os/include/xen/arch-x86/xen-x86_64.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/arch-x86/xen-x86_64.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,202 @@ +/****************************************************************************** + * xen-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ + +/* + * Hypercall interface: + * Input: %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6) + * Output: %rax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) + */ + +/* + * 64-bit segment selectors + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ + +#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ + +#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 +#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 +#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 +#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 +#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 +#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 +#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 +#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 +#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 + +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 + +#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 +#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +/* + * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) + * @which == SEGBASE_* ; @base == 64-bit base address + * Returns 0 on success. + */ +#define SEGBASE_FS 0 +#define SEGBASE_GS_USER 1 +#define SEGBASE_GS_KERNEL 2 +#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ + +/* + * int HYPERVISOR_iret(void) + * All arguments are on the kernel stack, in the following format. + * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq + * If flags contains VGCF_in_syscall: + * Restore RAX, RIP, RFLAGS, RSP. + * Discard R11, RCX, CS, SS. + * Otherwise: + * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. + * All other registers are saved on hypercall entry and restored to user. + */ +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ +#define _VGCF_in_syscall 8 +#define VGCF_in_syscall (1<<_VGCF_in_syscall) +#define VGCF_IN_SYSCALL VGCF_in_syscall + +#ifndef __ASSEMBLY__ + +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ +#define __DECL_REG(name) uint64_t r ## name +#endif + +struct cpu_user_regs { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + __DECL_REG(bp); + __DECL_REG(bx); + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + __DECL_REG(ax); + __DECL_REG(cx); + __DECL_REG(dx); + __DECL_REG(si); + __DECL_REG(di); + uint32_t error_code; /* private */ + uint32_t entry_vector; /* private */ + __DECL_REG(ip); + uint16_t cs, _pad0[1]; + uint8_t saved_upcall_mask; + uint8_t _pad1[3]; + __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG(sp); + uint16_t ss, _pad2[3]; + uint16_t es, _pad3[3]; + uint16_t ds, _pad4[3]; + uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +#undef __DECL_REG + +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +typedef unsigned long xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/arch-x86_32.h xen-4.6.5/extras/mini-os/include/xen/arch-x86_32.h --- xen-4.6.0/extras/mini-os/include/xen/arch-x86_32.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/arch-x86_32.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,27 @@ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "arch-x86/xen.h" diff -Nru xen-4.6.0/extras/mini-os/include/xen/arch-x86_64.h xen-4.6.5/extras/mini-os/include/xen/arch-x86_64.h --- xen-4.6.0/extras/mini-os/include/xen/arch-x86_64.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/arch-x86_64.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,43 @@ +/****************************************************************************** + * arch-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "arch-x86/xen.h" + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_callbacks(unsigned long event_selector, + * ` unsigned long event_address, + * ` unsigned long failsafe_selector, + * ` unsigned long failsafe_address); + * ` + * Register for callbacks on events. When an event (from an event + * channel) occurs, event_address is used as the value of eip. + * + * A similar callback occurs if the segment selectors are invalid. + * failsafe_address is used as the value of eip. + * + * On x86_64, event_selector and failsafe_selector are ignored (???). + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/callback.h xen-4.6.5/extras/mini-os/include/xen/callback.h --- xen-4.6.0/extras/mini-os/include/xen/callback.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/callback.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,121 @@ +/****************************************************************************** + * callback.h + * + * Register guest OS callbacks with Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Ian Campbell + */ + +#ifndef __XEN_PUBLIC_CALLBACK_H__ +#define __XEN_PUBLIC_CALLBACK_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * long callback_op(int cmd, void *extra_args) + * @cmd == CALLBACKOP_??? (callback operation). + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +/* x86: Callback for event delivery. */ +#define CALLBACKTYPE_event 0 + +/* x86: Failsafe callback when guest state cannot be restored by Xen. */ +#define CALLBACKTYPE_failsafe 1 + +/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */ +#define CALLBACKTYPE_syscall 2 + +/* + * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel + * feature is enabled. Do not use this callback type in new code. + */ +#define CALLBACKTYPE_sysenter_deprecated 3 + +/* x86: Callback for NMI delivery. */ +#define CALLBACKTYPE_nmi 4 + +/* + * x86: sysenter is only available as follows: + * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled + * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs + * ('32-on-32-on-64', '32-on-64-on-64') + * [nb. also 64-bit guest applications on Intel CPUs + * ('64-on-64-on-64'), but syscall is preferred] + */ +#define CALLBACKTYPE_sysenter 5 + +/* + * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs + * ('32-on-32-on-64', '32-on-64-on-64') + */ +#define CALLBACKTYPE_syscall32 7 + +/* + * Disable event deliver during callback? This flag is ignored for event and + * NMI callbacks: event delivery is unconditionally disabled. + */ +#define _CALLBACKF_mask_events 0 +#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events) + +/* + * Register a callback. + */ +#define CALLBACKOP_register 0 +struct callback_register { + uint16_t type; + uint16_t flags; + xen_callback_t address; +}; +typedef struct callback_register callback_register_t; +DEFINE_XEN_GUEST_HANDLE(callback_register_t); + +/* + * Unregister a callback. + * + * Not all callbacks can be unregistered. -EINVAL will be returned if + * you attempt to unregister such a callback. + */ +#define CALLBACKOP_unregister 1 +struct callback_unregister { + uint16_t type; + uint16_t _unused; +}; +typedef struct callback_unregister callback_unregister_t; +DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00030207 +#undef CALLBACKTYPE_sysenter +#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated +#endif + +#endif /* __XEN_PUBLIC_CALLBACK_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/COPYING xen-4.6.5/extras/mini-os/include/xen/COPYING --- xen-4.6.0/extras/mini-os/include/xen/COPYING 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/COPYING 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,38 @@ +XEN NOTICE +========== + +This copyright applies to all files within this subdirectory and its +subdirectories: + include/public/*.h + include/public/hvm/*.h + include/public/io/*.h + +The intention is that these files can be freely copied into the source +tree of an operating system when porting that OS to run on Xen. Doing +so does *not* cause the OS to become subject to the terms of the GPL. + +All other files in the Xen source distribution are covered by version +2 of the GNU General Public License except where explicitly stated +otherwise within individual source files. + + -- Keir Fraser (on behalf of the Xen team) + +===================================================================== + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff -Nru xen-4.6.0/extras/mini-os/include/xen/dom0_ops.h xen-4.6.5/extras/mini-os/include/xen/dom0_ops.h --- xen-4.6.0/extras/mini-os/include/xen/dom0_ops.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/dom0_ops.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,120 @@ +/****************************************************************************** + * dom0_ops.h + * + * Process command requests from domain-0 guest OS. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2003, B Dragovic + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_DOM0_OPS_H__ +#define __XEN_PUBLIC_DOM0_OPS_H__ + +#include "xen.h" +#include "platform.h" + +#if __XEN_INTERFACE_VERSION__ >= 0x00030204 +#error "dom0_ops.h is a compatibility interface only" +#endif + +#define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION + +#define DOM0_SETTIME XENPF_settime +#define dom0_settime xenpf_settime +#define dom0_settime_t xenpf_settime_t + +#define DOM0_ADD_MEMTYPE XENPF_add_memtype +#define dom0_add_memtype xenpf_add_memtype +#define dom0_add_memtype_t xenpf_add_memtype_t + +#define DOM0_DEL_MEMTYPE XENPF_del_memtype +#define dom0_del_memtype xenpf_del_memtype +#define dom0_del_memtype_t xenpf_del_memtype_t + +#define DOM0_READ_MEMTYPE XENPF_read_memtype +#define dom0_read_memtype xenpf_read_memtype +#define dom0_read_memtype_t xenpf_read_memtype_t + +#define DOM0_MICROCODE XENPF_microcode_update +#define dom0_microcode xenpf_microcode_update +#define dom0_microcode_t xenpf_microcode_update_t + +#define DOM0_PLATFORM_QUIRK XENPF_platform_quirk +#define dom0_platform_quirk xenpf_platform_quirk +#define dom0_platform_quirk_t xenpf_platform_quirk_t + +typedef uint64_t cpumap_t; + +/* Unsupported legacy operation -- defined for API compatibility. */ +#define DOM0_MSR 15 +struct dom0_msr { + /* IN variables. */ + uint32_t write; + cpumap_t cpu_mask; + uint32_t msr; + uint32_t in1; + uint32_t in2; + /* OUT variables. */ + uint32_t out1; + uint32_t out2; +}; +typedef struct dom0_msr dom0_msr_t; +DEFINE_XEN_GUEST_HANDLE(dom0_msr_t); + +/* Unsupported legacy operation -- defined for API compatibility. */ +#define DOM0_PHYSICAL_MEMORY_MAP 40 +struct dom0_memory_map_entry { + uint64_t start, end; + uint32_t flags; /* reserved */ + uint8_t is_ram; +}; +typedef struct dom0_memory_map_entry dom0_memory_map_entry_t; +DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t); + +struct dom0_op { + uint32_t cmd; + uint32_t interface_version; /* DOM0_INTERFACE_VERSION */ + union { + struct dom0_msr msr; + struct dom0_settime settime; + struct dom0_add_memtype add_memtype; + struct dom0_del_memtype del_memtype; + struct dom0_read_memtype read_memtype; + struct dom0_microcode microcode; + struct dom0_platform_quirk platform_quirk; + struct dom0_memory_map_entry physical_memory_map; + uint8_t pad[128]; + } u; +}; +typedef struct dom0_op dom0_op_t; +DEFINE_XEN_GUEST_HANDLE(dom0_op_t); + +#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/domctl.h xen-4.6.5/extras/mini-os/include/xen/domctl.h --- xen-4.6.0/extras/mini-os/include/xen/domctl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/domctl.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,1154 @@ +/****************************************************************************** + * domctl.h + * + * Domain management operations. For use by node control stack. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2003, B Dragovic + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_DOMCTL_H__ +#define __XEN_PUBLIC_DOMCTL_H__ + +#if !defined(__XEN__) && !defined(__XEN_TOOLS__) +#error "domctl operations are intended for use by node control tools only" +#endif + +#include "xen.h" +#include "grant_table.h" +#include "hvm/save.h" +#include "memory.h" + +#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a + +/* + * NB. xen_domctl.domain is an IN/OUT parameter for this operation. + * If it is specified as zero, an id is auto-allocated and returned. + */ +/* XEN_DOMCTL_createdomain */ +struct xen_domctl_createdomain { + /* IN parameters */ + uint32_t ssidref; + xen_domain_handle_t handle; + /* Is this an HVM guest (as opposed to a PVH or PV guest)? */ +#define _XEN_DOMCTL_CDF_hvm_guest 0 +#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) + /* Use hardware-assisted paging if available? */ +#define _XEN_DOMCTL_CDF_hap 1 +#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap) + /* Should domain memory integrity be verifed by tboot during Sx? */ +#define _XEN_DOMCTL_CDF_s3_integrity 2 +#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity) + /* Disable out-of-sync shadow page tables? */ +#define _XEN_DOMCTL_CDF_oos_off 3 +#define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off) + /* Is this a PVH guest (as opposed to an HVM or PV guest)? */ +#define _XEN_DOMCTL_CDF_pvh_guest 4 +#define XEN_DOMCTL_CDF_pvh_guest (1U<<_XEN_DOMCTL_CDF_pvh_guest) + uint32_t flags; +}; +typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); + +#if defined(__arm__) || defined(__aarch64__) +#define XEN_DOMCTL_CONFIG_GIC_DEFAULT 0 +#define XEN_DOMCTL_CONFIG_GIC_V2 1 +#define XEN_DOMCTL_CONFIG_GIC_V3 2 +/* XEN_DOMCTL_configure_domain */ +struct xen_domctl_arm_configuredomain { + /* IN/OUT parameters */ + uint8_t gic_version; +}; +typedef struct xen_domctl_arm_configuredomain xen_domctl_arm_configuredomain_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_arm_configuredomain_t); +#endif + +/* XEN_DOMCTL_getdomaininfo */ +struct xen_domctl_getdomaininfo { + /* OUT variables. */ + domid_t domain; /* Also echoed in domctl.domain */ + /* Domain is scheduled to die. */ +#define _XEN_DOMINF_dying 0 +#define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying) + /* Domain is an HVM guest (as opposed to a PV guest). */ +#define _XEN_DOMINF_hvm_guest 1 +#define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest) + /* The guest OS has shut down. */ +#define _XEN_DOMINF_shutdown 2 +#define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown) + /* Currently paused by control software. */ +#define _XEN_DOMINF_paused 3 +#define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused) + /* Currently blocked pending an event. */ +#define _XEN_DOMINF_blocked 4 +#define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked) + /* Domain is currently running. */ +#define _XEN_DOMINF_running 5 +#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running) + /* Being debugged. */ +#define _XEN_DOMINF_debugged 6 +#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) +/* domain is PVH */ +#define _XEN_DOMINF_pvh_guest 7 +#define XEN_DOMINF_pvh_guest (1U<<_XEN_DOMINF_pvh_guest) + /* XEN_DOMINF_shutdown guest-supplied code. */ +#define XEN_DOMINF_shutdownmask 255 +#define XEN_DOMINF_shutdownshift 16 + uint32_t flags; /* XEN_DOMINF_* */ + uint64_aligned_t tot_pages; + uint64_aligned_t max_pages; + uint64_aligned_t outstanding_pages; + uint64_aligned_t shr_pages; + uint64_aligned_t paged_pages; + uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ + uint64_aligned_t cpu_time; + uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ +#define XEN_INVALID_MAX_VCPU_ID (~0U) /* Domain has no vcpus? */ + uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ + uint32_t ssidref; + xen_domain_handle_t handle; + uint32_t cpupool; +}; +typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); + + +/* XEN_DOMCTL_getmemlist */ +struct xen_domctl_getmemlist { + /* IN variables. */ + /* Max entries to write to output buffer. */ + uint64_aligned_t max_pfns; + /* Start index in guest's page list. */ + uint64_aligned_t start_pfn; + XEN_GUEST_HANDLE_64(uint64) buffer; + /* OUT variables. */ + uint64_aligned_t num_pfns; +}; +typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); + + +/* XEN_DOMCTL_getpageframeinfo */ + +#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28 +#define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28) +#define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28) +#define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28) +#define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28) +#define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28) +#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28) +#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31) +#define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */ +#define XEN_DOMCTL_PFINFO_XALLOC (0xeU<<28) /* allocate-only page */ +#define XEN_DOMCTL_PFINFO_BROKEN (0xdU<<28) /* broken page */ +#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28) + +struct xen_domctl_getpageframeinfo { + /* IN variables. */ + uint64_aligned_t gmfn; /* GMFN to query */ + /* OUT variables. */ + /* Is the page PINNED to a type? */ + uint32_t type; /* see above type defs */ +}; +typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); + + +/* XEN_DOMCTL_getpageframeinfo2 */ +struct xen_domctl_getpageframeinfo2 { + /* IN variables. */ + uint64_aligned_t num; + /* IN/OUT variables. */ + XEN_GUEST_HANDLE_64(uint32) array; +}; +typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); + +/* XEN_DOMCTL_getpageframeinfo3 */ +struct xen_domctl_getpageframeinfo3 { + /* IN variables. */ + uint64_aligned_t num; + /* IN/OUT variables. */ + XEN_GUEST_HANDLE_64(xen_pfn_t) array; +}; + + +/* + * Control shadow pagetables operation + */ +/* XEN_DOMCTL_shadow_op */ + +/* Disable shadow mode. */ +#define XEN_DOMCTL_SHADOW_OP_OFF 0 + +/* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE 32 + +/* Log-dirty bitmap operations. */ + /* Return the bitmap and clean internal copy for next round. */ +#define XEN_DOMCTL_SHADOW_OP_CLEAN 11 + /* Return the bitmap but do not modify internal copy. */ +#define XEN_DOMCTL_SHADOW_OP_PEEK 12 + +/* Memory allocation accessors. */ +#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION 30 +#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION 31 + +/* Legacy enable operations. */ + /* Equiv. to ENABLE with no mode flags. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST 1 + /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY 2 + /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE 3 + +/* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */ + /* + * Shadow pagetables are refcounted: guest does not use explicit mmu + * operations nor write-protect its pagetables. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT (1 << 1) + /* + * Log pages in a bitmap as they are dirtied. + * Used for live relocation to determine which pages must be re-sent. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2) + /* + * Automatically translate GPFNs into MFNs. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3) + /* + * Xen does not steal virtual address space from the guest. + * Requires HVM support. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4) + +struct xen_domctl_shadow_op_stats { + uint32_t fault_count; + uint32_t dirty_count; +}; +typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t); + +struct xen_domctl_shadow_op { + /* IN variables. */ + uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */ + + /* OP_ENABLE */ + uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */ + + /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */ + uint32_t mb; /* Shadow memory allocation in MB */ + + /* OP_PEEK / OP_CLEAN */ + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; + uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */ + struct xen_domctl_shadow_op_stats stats; +}; +typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t); + + +/* XEN_DOMCTL_max_mem */ +struct xen_domctl_max_mem { + /* IN variables. */ + uint64_aligned_t max_memkb; +}; +typedef struct xen_domctl_max_mem xen_domctl_max_mem_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); + + +/* XEN_DOMCTL_setvcpucontext */ +/* XEN_DOMCTL_getvcpucontext */ +struct xen_domctl_vcpucontext { + uint32_t vcpu; /* IN */ + XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ +}; +typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); + + +/* XEN_DOMCTL_getvcpuinfo */ +struct xen_domctl_getvcpuinfo { + /* IN variables. */ + uint32_t vcpu; + /* OUT variables. */ + uint8_t online; /* currently online (not hotplugged)? */ + uint8_t blocked; /* blocked waiting for an event? */ + uint8_t running; /* currently scheduled on its CPU? */ + uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */ + uint32_t cpu; /* current mapping */ +}; +typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t); + + +/* Get/set the NUMA node(s) with which the guest has affinity with. */ +/* XEN_DOMCTL_setnodeaffinity */ +/* XEN_DOMCTL_getnodeaffinity */ +struct xen_domctl_nodeaffinity { + struct xenctl_bitmap nodemap;/* IN */ +}; +typedef struct xen_domctl_nodeaffinity xen_domctl_nodeaffinity_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_nodeaffinity_t); + + +/* Get/set which physical cpus a vcpu can execute on. */ +/* XEN_DOMCTL_setvcpuaffinity */ +/* XEN_DOMCTL_getvcpuaffinity */ +struct xen_domctl_vcpuaffinity { + /* IN variables. */ + uint32_t vcpu; + /* Set/get the hard affinity for vcpu */ +#define _XEN_VCPUAFFINITY_HARD 0 +#define XEN_VCPUAFFINITY_HARD (1U<<_XEN_VCPUAFFINITY_HARD) + /* Set/get the soft affinity for vcpu */ +#define _XEN_VCPUAFFINITY_SOFT 1 +#define XEN_VCPUAFFINITY_SOFT (1U<<_XEN_VCPUAFFINITY_SOFT) + uint32_t flags; + /* + * IN/OUT variables. + * + * Both are IN/OUT for XEN_DOMCTL_setvcpuaffinity, in which case they + * contain effective hard or/and soft affinity. That is, upon successful + * return, cpumap_soft, contains the intersection of the soft affinity, + * hard affinity and the cpupool's online CPUs for the domain (if + * XEN_VCPUAFFINITY_SOFT was set in flags). cpumap_hard contains the + * intersection between hard affinity and the cpupool's online CPUs (if + * XEN_VCPUAFFINITY_HARD was set in flags). + * + * Both are OUT-only for XEN_DOMCTL_getvcpuaffinity, in which case they + * contain the plain hard and/or soft affinity masks that were set during + * previous successful calls to XEN_DOMCTL_setvcpuaffinity (or the + * default values), without intersecting or altering them in any way. + */ + struct xenctl_bitmap cpumap_hard; + struct xenctl_bitmap cpumap_soft; +}; +typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t); + + +/* XEN_DOMCTL_max_vcpus */ +struct xen_domctl_max_vcpus { + uint32_t max; /* maximum number of vcpus */ +}; +typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); + + +/* XEN_DOMCTL_scheduler_op */ +/* Scheduler types. */ +#define XEN_SCHEDULER_SEDF 4 +#define XEN_SCHEDULER_CREDIT 5 +#define XEN_SCHEDULER_CREDIT2 6 +#define XEN_SCHEDULER_ARINC653 7 +#define XEN_SCHEDULER_RTDS 8 + +/* Set or get info? */ +#define XEN_DOMCTL_SCHEDOP_putinfo 0 +#define XEN_DOMCTL_SCHEDOP_getinfo 1 +struct xen_domctl_scheduler_op { + uint32_t sched_id; /* XEN_SCHEDULER_* */ + uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */ + union { + struct xen_domctl_sched_sedf { + uint64_aligned_t period; + uint64_aligned_t slice; + uint64_aligned_t latency; + uint32_t extratime; + uint32_t weight; + } sedf; + struct xen_domctl_sched_credit { + uint16_t weight; + uint16_t cap; + } credit; + struct xen_domctl_sched_credit2 { + uint16_t weight; + } credit2; + struct xen_domctl_sched_rtds { + uint32_t period; + uint32_t budget; + } rtds; + } u; +}; +typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t); + + +/* XEN_DOMCTL_setdomainhandle */ +struct xen_domctl_setdomainhandle { + xen_domain_handle_t handle; +}; +typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t); + + +/* XEN_DOMCTL_setdebugging */ +struct xen_domctl_setdebugging { + uint8_t enable; +}; +typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t); + + +/* XEN_DOMCTL_irq_permission */ +struct xen_domctl_irq_permission { + uint8_t pirq; + uint8_t allow_access; /* flag to specify enable/disable of IRQ access */ +}; +typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t); + + +/* XEN_DOMCTL_iomem_permission */ +struct xen_domctl_iomem_permission { + uint64_aligned_t first_mfn;/* first page (physical page number) in range */ + uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ + uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ +}; +typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t); + + +/* XEN_DOMCTL_ioport_permission */ +struct xen_domctl_ioport_permission { + uint32_t first_port; /* first port int range */ + uint32_t nr_ports; /* size of port range */ + uint8_t allow_access; /* allow or deny access to range? */ +}; +typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t); + + +/* XEN_DOMCTL_hypercall_init */ +struct xen_domctl_hypercall_init { + uint64_aligned_t gmfn; /* GMFN to be initialised */ +}; +typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t); + + +/* XEN_DOMCTL_settimeoffset */ +struct xen_domctl_settimeoffset { + int32_t time_offset_seconds; /* applied to domain wallclock time */ +}; +typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); + +/* XEN_DOMCTL_gethvmcontext */ +/* XEN_DOMCTL_sethvmcontext */ +typedef struct xen_domctl_hvmcontext { + uint32_t size; /* IN/OUT: size of buffer / bytes filled */ + XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call + * gethvmcontext with NULL + * buffer to get size req'd */ +} xen_domctl_hvmcontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); + + +/* XEN_DOMCTL_set_address_size */ +/* XEN_DOMCTL_get_address_size */ +typedef struct xen_domctl_address_size { + uint32_t size; +} xen_domctl_address_size_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t); + + +/* XEN_DOMCTL_sendtrigger */ +#define XEN_DOMCTL_SENDTRIGGER_NMI 0 +#define XEN_DOMCTL_SENDTRIGGER_RESET 1 +#define XEN_DOMCTL_SENDTRIGGER_INIT 2 +#define XEN_DOMCTL_SENDTRIGGER_POWER 3 +#define XEN_DOMCTL_SENDTRIGGER_SLEEP 4 +struct xen_domctl_sendtrigger { + uint32_t trigger; /* IN */ + uint32_t vcpu; /* IN */ +}; +typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t); + + +/* Assign PCI device to HVM guest. Sets up IOMMU structures. */ +/* XEN_DOMCTL_assign_device */ +/* XEN_DOMCTL_test_assign_device */ +/* XEN_DOMCTL_deassign_device */ +struct xen_domctl_assign_device { + uint32_t machine_sbdf; /* machine PCI ID of assigned device */ +}; +typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); + +/* Retrieve sibling devices infomation of machine_sbdf */ +/* XEN_DOMCTL_get_device_group */ +struct xen_domctl_get_device_group { + uint32_t machine_sbdf; /* IN */ + uint32_t max_sdevs; /* IN */ + uint32_t num_sdevs; /* OUT */ + XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */ +}; +typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t); + +/* Pass-through interrupts: bind real irq -> hvm devfn. */ +/* XEN_DOMCTL_bind_pt_irq */ +/* XEN_DOMCTL_unbind_pt_irq */ +typedef enum pt_irq_type_e { + PT_IRQ_TYPE_PCI, + PT_IRQ_TYPE_ISA, + PT_IRQ_TYPE_MSI, + PT_IRQ_TYPE_MSI_TRANSLATE, +} pt_irq_type_t; +struct xen_domctl_bind_pt_irq { + uint32_t machine_irq; + pt_irq_type_t irq_type; + uint32_t hvm_domid; + + union { + struct { + uint8_t isa_irq; + } isa; + struct { + uint8_t bus; + uint8_t device; + uint8_t intx; + } pci; + struct { + uint8_t gvec; + uint32_t gflags; + uint64_aligned_t gtable; + } msi; + } u; +}; +typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t); + + +/* Bind machine I/O address range -> HVM address range. */ +/* XEN_DOMCTL_memory_mapping */ +#define DPCI_ADD_MAPPING 1 +#define DPCI_REMOVE_MAPPING 0 +struct xen_domctl_memory_mapping { + uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */ + uint64_aligned_t first_mfn; /* first page (machine page) in range */ + uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ + uint32_t add_mapping; /* add or remove mapping */ + uint32_t padding; /* padding for 64-bit aligned structure */ +}; +typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t); + + +/* Bind machine I/O port range -> HVM I/O port range. */ +/* XEN_DOMCTL_ioport_mapping */ +struct xen_domctl_ioport_mapping { + uint32_t first_gport; /* first guest IO port*/ + uint32_t first_mport; /* first machine IO port */ + uint32_t nr_ports; /* size of port range */ + uint32_t add_mapping; /* add or remove mapping */ +}; +typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t); + + +/* + * Pin caching type of RAM space for x86 HVM domU. + */ +/* XEN_DOMCTL_pin_mem_cacheattr */ +/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */ +#define XEN_DOMCTL_MEM_CACHEATTR_UC 0 +#define XEN_DOMCTL_MEM_CACHEATTR_WC 1 +#define XEN_DOMCTL_MEM_CACHEATTR_WT 4 +#define XEN_DOMCTL_MEM_CACHEATTR_WP 5 +#define XEN_DOMCTL_MEM_CACHEATTR_WB 6 +#define XEN_DOMCTL_MEM_CACHEATTR_UCM 7 +#define XEN_DOMCTL_DELETE_MEM_CACHEATTR (~(uint32_t)0) +struct xen_domctl_pin_mem_cacheattr { + uint64_aligned_t start, end; + uint32_t type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ +}; +typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t); + + +/* XEN_DOMCTL_set_ext_vcpucontext */ +/* XEN_DOMCTL_get_ext_vcpucontext */ +struct xen_domctl_ext_vcpucontext { + /* IN: VCPU that this call applies to. */ + uint32_t vcpu; + /* + * SET: Size of struct (IN) + * GET: Size of struct (OUT, up to 128 bytes) + */ + uint32_t size; +#if defined(__i386__) || defined(__x86_64__) + /* SYSCALL from 32-bit mode and SYSENTER callback information. */ + /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */ + uint64_aligned_t syscall32_callback_eip; + uint64_aligned_t sysenter_callback_eip; + uint16_t syscall32_callback_cs; + uint16_t sysenter_callback_cs; + uint8_t syscall32_disables_events; + uint8_t sysenter_disables_events; +#if defined(__GNUC__) + union { + uint64_aligned_t mcg_cap; + struct hvm_vmce_vcpu vmce; + }; +#else + struct hvm_vmce_vcpu vmce; +#endif +#endif +}; +typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t); + +/* + * Set the target domain for a domain + */ +/* XEN_DOMCTL_set_target */ +struct xen_domctl_set_target { + domid_t target; +}; +typedef struct xen_domctl_set_target xen_domctl_set_target_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t); + +#if defined(__i386__) || defined(__x86_64__) +# define XEN_CPUID_INPUT_UNUSED 0xFFFFFFFF +/* XEN_DOMCTL_set_cpuid */ +struct xen_domctl_cpuid { + uint32_t input[2]; + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; +}; +typedef struct xen_domctl_cpuid xen_domctl_cpuid_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t); +#endif + +/* + * Arranges that if the domain suspends (specifically, if it shuts + * down with code SHUTDOWN_suspend), this event channel will be + * notified. + * + * This is _instead of_ the usual notification to the global + * VIRQ_DOM_EXC. (In most systems that pirq is owned by xenstored.) + * + * Only one subscription per domain is possible. Last subscriber + * wins; others are silently displaced. + * + * NB that contrary to the rather general name, it only applies to + * domain shutdown with code suspend. Shutdown for other reasons + * (including crash), and domain death, are notified to VIRQ_DOM_EXC + * regardless. + */ +/* XEN_DOMCTL_subscribe */ +struct xen_domctl_subscribe { + uint32_t port; /* IN */ +}; +typedef struct xen_domctl_subscribe xen_domctl_subscribe_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t); + +/* + * Define the maximum machine address size which should be allocated + * to a guest. + */ +/* XEN_DOMCTL_set_machine_address_size */ +/* XEN_DOMCTL_get_machine_address_size */ + +/* + * Do not inject spurious page faults into this domain. + */ +/* XEN_DOMCTL_suppress_spurious_page_faults */ + +/* XEN_DOMCTL_debug_op */ +#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF 0 +#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON 1 +struct xen_domctl_debug_op { + uint32_t op; /* IN */ + uint32_t vcpu; /* IN */ +}; +typedef struct xen_domctl_debug_op xen_domctl_debug_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t); + +/* + * Request a particular record from the HVM context + */ +/* XEN_DOMCTL_gethvmcontext_partial */ +typedef struct xen_domctl_hvmcontext_partial { + uint32_t type; /* IN: Type of record required */ + uint32_t instance; /* IN: Instance of that type */ + XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */ +} xen_domctl_hvmcontext_partial_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t); + +/* XEN_DOMCTL_disable_migrate */ +typedef struct xen_domctl_disable_migrate { + uint32_t disable; /* IN: 1: disable migration and restore */ +} xen_domctl_disable_migrate_t; + + +/* XEN_DOMCTL_gettscinfo */ +/* XEN_DOMCTL_settscinfo */ +struct xen_guest_tsc_info { + uint32_t tsc_mode; + uint32_t gtsc_khz; + uint32_t incarnation; + uint32_t pad; + uint64_aligned_t elapsed_nsec; +}; +typedef struct xen_guest_tsc_info xen_guest_tsc_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_guest_tsc_info_t); +typedef struct xen_domctl_tsc_info { + XEN_GUEST_HANDLE_64(xen_guest_tsc_info_t) out_info; /* OUT */ + xen_guest_tsc_info_t info; /* IN */ +} xen_domctl_tsc_info_t; + +/* XEN_DOMCTL_gdbsx_guestmemio guest mem io */ +struct xen_domctl_gdbsx_memio { + /* IN */ + uint64_aligned_t pgd3val;/* optional: init_mm.pgd[3] value */ + uint64_aligned_t gva; /* guest virtual address */ + uint64_aligned_t uva; /* user buffer virtual address */ + uint32_t len; /* number of bytes to read/write */ + uint8_t gwr; /* 0 = read from guest. 1 = write to guest */ + /* OUT */ + uint32_t remain; /* bytes remaining to be copied */ +}; + +/* XEN_DOMCTL_gdbsx_pausevcpu */ +/* XEN_DOMCTL_gdbsx_unpausevcpu */ +struct xen_domctl_gdbsx_pauseunp_vcpu { /* pause/unpause a vcpu */ + uint32_t vcpu; /* which vcpu */ +}; + +/* XEN_DOMCTL_gdbsx_domstatus */ +struct xen_domctl_gdbsx_domstatus { + /* OUT */ + uint8_t paused; /* is the domain paused */ + uint32_t vcpu_id; /* any vcpu in an event? */ + uint32_t vcpu_ev; /* if yes, what event? */ +}; + +/* + * Memory event operations + */ + +/* XEN_DOMCTL_mem_event_op */ + +/* + * Domain memory paging + * Page memory in and out. + * Domctl interface to set up and tear down the + * pager<->hypervisor interface. Use XENMEM_paging_op* + * to perform per-page operations. + * + * The XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE domctl returns several + * non-standard error codes to indicate why paging could not be enabled: + * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest + * EMLINK - guest has iommu passthrough enabled + * EXDEV - guest has PoD enabled + * EBUSY - guest has or had paging enabled, ring buffer still active + */ +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING 1 + +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE 1 + +/* + * Access permissions. + * + * As with paging, use the domctl for teardown/setup of the + * helper<->hypervisor interface. + * + * There are HVM hypercalls to set the per-page access permissions of every + * page in a domain. When one of these permissions--independent, read, + * write, and execute--is violated, the VCPU is paused and a memory event + * is sent with what happened. (See public/mem_event.h) . + * + * The memory event handler can then resume the VCPU and redo the access + * with a XENMEM_access_op_resume hypercall. + * + * The XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE domctl returns several + * non-standard error codes to indicate why access could not be enabled: + * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest + * EBUSY - guest has or had access enabled, ring buffer still active + */ +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS 2 + +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE 1 +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION 2 + +/* + * Sharing ENOMEM helper. + * + * As with paging, use the domctl for teardown/setup of the + * helper<->hypervisor interface. + * + * If setup, this ring is used to communicate failed allocations + * in the unshare path. XENMEM_sharing_op_resume is used to wake up + * vcpus that could not unshare. + * + * Note that shring can be turned on (as per the domctl below) + * *without* this ring being setup. + */ +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING 3 + +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DISABLE 1 + +/* Use for teardown/setup of helper<->hypervisor interface for paging, + * access and sharing.*/ +struct xen_domctl_mem_event_op { + uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ + uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ + + uint32_t port; /* OUT: event channel for ring */ +}; +typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t); + +/* + * Memory sharing operations + */ +/* XEN_DOMCTL_mem_sharing_op. + * The CONTROL sub-domctl is used for bringup/teardown. */ +#define XEN_DOMCTL_MEM_SHARING_CONTROL 0 + +struct xen_domctl_mem_sharing_op { + uint8_t op; /* XEN_DOMCTL_MEM_SHARING_* */ + + union { + uint8_t enable; /* CONTROL */ + } u; +}; +typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t); + +struct xen_domctl_audit_p2m { + /* OUT error counts */ + uint64_t orphans; + uint64_t m2p_bad; + uint64_t p2m_bad; +}; +typedef struct xen_domctl_audit_p2m xen_domctl_audit_p2m_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_audit_p2m_t); + +struct xen_domctl_set_virq_handler { + uint32_t virq; /* IN */ +}; +typedef struct xen_domctl_set_virq_handler xen_domctl_set_virq_handler_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_virq_handler_t); + +#if defined(__i386__) || defined(__x86_64__) +/* XEN_DOMCTL_setvcpuextstate */ +/* XEN_DOMCTL_getvcpuextstate */ +struct xen_domctl_vcpuextstate { + /* IN: VCPU that this call applies to. */ + uint32_t vcpu; + /* + * SET: Ignored. + * GET: xfeature support mask of struct (IN/OUT) + * xfeature mask is served as identifications of the saving format + * so that compatible CPUs can have a check on format to decide + * whether it can restore. + */ + uint64_aligned_t xfeature_mask; + /* + * SET: Size of struct (IN) + * GET: Size of struct (IN/OUT) + */ + uint64_aligned_t size; + XEN_GUEST_HANDLE_64(uint64) buffer; +}; +typedef struct xen_domctl_vcpuextstate xen_domctl_vcpuextstate_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t); +#endif + +/* XEN_DOMCTL_set_access_required: sets whether a memory event listener + * must be present to handle page access events: if false, the page + * access will revert to full permissions if no one is listening; + * */ +struct xen_domctl_set_access_required { + uint8_t access_required; +}; +typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); + +struct xen_domctl_set_broken_page_p2m { + uint64_aligned_t pfn; +}; +typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t); + +/* + * XEN_DOMCTL_set_max_evtchn: sets the maximum event channel port + * number the guest may use. Use this limit the amount of resources + * (global mapping space, xenheap) a guest may use for event channels. + */ +struct xen_domctl_set_max_evtchn { + uint32_t max_port; +}; +typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t); + +/* + * ARM: Clean and invalidate caches associated with given region of + * guest memory. + */ +struct xen_domctl_cacheflush { + /* IN: page range to flush. */ + xen_pfn_t start_pfn, nr_pfns; +}; +typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t); + +#if defined(__i386__) || defined(__x86_64__) +struct xen_domctl_vcpu_msr { + uint32_t index; + uint32_t reserved; + uint64_aligned_t value; +}; +typedef struct xen_domctl_vcpu_msr xen_domctl_vcpu_msr_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msr_t); + +/* + * XEN_DOMCTL_set_vcpu_msrs / XEN_DOMCTL_get_vcpu_msrs. + * + * Input: + * - A NULL 'msrs' guest handle is a request for the maximum 'msr_count'. + * - Otherwise, 'msr_count' is the number of entries in 'msrs'. + * + * Output for get: + * - If 'msr_count' is less than the number Xen needs to write, -ENOBUFS shall + * be returned and 'msr_count' updated to reflect the intended number. + * - On success, 'msr_count' shall indicate the number of MSRs written, which + * may be less than the maximum if some are not currently used by the vcpu. + * + * Output for set: + * - If Xen encounters an error with a specific MSR, -EINVAL shall be returned + * and 'msr_count' shall be set to the offending index, to aid debugging. + */ +struct xen_domctl_vcpu_msrs { + uint32_t vcpu; /* IN */ + uint32_t msr_count; /* IN/OUT */ + XEN_GUEST_HANDLE_64(xen_domctl_vcpu_msr_t) msrs; /* IN/OUT */ +}; +typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t); +#endif + +/* + * Use in XEN_DOMCTL_setvnumainfo to set + * vNUMA domain topology. + */ +struct xen_domctl_vnuma { + uint32_t nr_vnodes; + uint32_t nr_vmemranges; + uint32_t nr_vcpus; + uint32_t pad; + XEN_GUEST_HANDLE_64(uint) vdistance; + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; + + /* + * vnodes to physical NUMA nodes mask. + * This kept on per-domain basis for + * interested consumers, such as numa aware ballooning. + */ + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; + + /* + * memory rages for each vNUMA node + */ + XEN_GUEST_HANDLE_64(xen_vmemrange_t) vmemrange; +}; +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); + +struct xen_domctl_psr_cmt_op { +#define XEN_DOMCTL_PSR_CMT_OP_DETACH 0 +#define XEN_DOMCTL_PSR_CMT_OP_ATTACH 1 +#define XEN_DOMCTL_PSR_CMT_OP_QUERY_RMID 2 + uint32_t cmd; + uint32_t data; +}; +typedef struct xen_domctl_psr_cmt_op xen_domctl_psr_cmt_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cmt_op_t); + +struct xen_domctl { + uint32_t cmd; +#define XEN_DOMCTL_createdomain 1 +#define XEN_DOMCTL_destroydomain 2 +#define XEN_DOMCTL_pausedomain 3 +#define XEN_DOMCTL_unpausedomain 4 +#define XEN_DOMCTL_getdomaininfo 5 +#define XEN_DOMCTL_getmemlist 6 +#define XEN_DOMCTL_getpageframeinfo 7 +#define XEN_DOMCTL_getpageframeinfo2 8 +#define XEN_DOMCTL_setvcpuaffinity 9 +#define XEN_DOMCTL_shadow_op 10 +#define XEN_DOMCTL_max_mem 11 +#define XEN_DOMCTL_setvcpucontext 12 +#define XEN_DOMCTL_getvcpucontext 13 +#define XEN_DOMCTL_getvcpuinfo 14 +#define XEN_DOMCTL_max_vcpus 15 +#define XEN_DOMCTL_scheduler_op 16 +#define XEN_DOMCTL_setdomainhandle 17 +#define XEN_DOMCTL_setdebugging 18 +#define XEN_DOMCTL_irq_permission 19 +#define XEN_DOMCTL_iomem_permission 20 +#define XEN_DOMCTL_ioport_permission 21 +#define XEN_DOMCTL_hypercall_init 22 +#define XEN_DOMCTL_arch_setup 23 /* Obsolete IA64 only */ +#define XEN_DOMCTL_settimeoffset 24 +#define XEN_DOMCTL_getvcpuaffinity 25 +#define XEN_DOMCTL_real_mode_area 26 /* Obsolete PPC only */ +#define XEN_DOMCTL_resumedomain 27 +#define XEN_DOMCTL_sendtrigger 28 +#define XEN_DOMCTL_subscribe 29 +#define XEN_DOMCTL_gethvmcontext 33 +#define XEN_DOMCTL_sethvmcontext 34 +#define XEN_DOMCTL_set_address_size 35 +#define XEN_DOMCTL_get_address_size 36 +#define XEN_DOMCTL_assign_device 37 +#define XEN_DOMCTL_bind_pt_irq 38 +#define XEN_DOMCTL_memory_mapping 39 +#define XEN_DOMCTL_ioport_mapping 40 +#define XEN_DOMCTL_pin_mem_cacheattr 41 +#define XEN_DOMCTL_set_ext_vcpucontext 42 +#define XEN_DOMCTL_get_ext_vcpucontext 43 +#define XEN_DOMCTL_set_opt_feature 44 /* Obsolete IA64 only */ +#define XEN_DOMCTL_test_assign_device 45 +#define XEN_DOMCTL_set_target 46 +#define XEN_DOMCTL_deassign_device 47 +#define XEN_DOMCTL_unbind_pt_irq 48 +#define XEN_DOMCTL_set_cpuid 49 +#define XEN_DOMCTL_get_device_group 50 +#define XEN_DOMCTL_set_machine_address_size 51 +#define XEN_DOMCTL_get_machine_address_size 52 +#define XEN_DOMCTL_suppress_spurious_page_faults 53 +#define XEN_DOMCTL_debug_op 54 +#define XEN_DOMCTL_gethvmcontext_partial 55 +#define XEN_DOMCTL_mem_event_op 56 +#define XEN_DOMCTL_mem_sharing_op 57 +#define XEN_DOMCTL_disable_migrate 58 +#define XEN_DOMCTL_gettscinfo 59 +#define XEN_DOMCTL_settscinfo 60 +#define XEN_DOMCTL_getpageframeinfo3 61 +#define XEN_DOMCTL_setvcpuextstate 62 +#define XEN_DOMCTL_getvcpuextstate 63 +#define XEN_DOMCTL_set_access_required 64 +#define XEN_DOMCTL_audit_p2m 65 +#define XEN_DOMCTL_set_virq_handler 66 +#define XEN_DOMCTL_set_broken_page_p2m 67 +#define XEN_DOMCTL_setnodeaffinity 68 +#define XEN_DOMCTL_getnodeaffinity 69 +#define XEN_DOMCTL_set_max_evtchn 70 +#define XEN_DOMCTL_cacheflush 71 +#define XEN_DOMCTL_get_vcpu_msrs 72 +#define XEN_DOMCTL_set_vcpu_msrs 73 +#define XEN_DOMCTL_setvnumainfo 74 +#define XEN_DOMCTL_psr_cmt_op 75 +#define XEN_DOMCTL_arm_configure_domain 76 +#define XEN_DOMCTL_gdbsx_guestmemio 1000 +#define XEN_DOMCTL_gdbsx_pausevcpu 1001 +#define XEN_DOMCTL_gdbsx_unpausevcpu 1002 +#define XEN_DOMCTL_gdbsx_domstatus 1003 + uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ + domid_t domain; + union { + struct xen_domctl_createdomain createdomain; +#if defined(__arm__) || defined(__aarch64__) + struct xen_domctl_arm_configuredomain configuredomain; +#endif + struct xen_domctl_getdomaininfo getdomaininfo; + struct xen_domctl_getmemlist getmemlist; + struct xen_domctl_getpageframeinfo getpageframeinfo; + struct xen_domctl_getpageframeinfo2 getpageframeinfo2; + struct xen_domctl_getpageframeinfo3 getpageframeinfo3; + struct xen_domctl_nodeaffinity nodeaffinity; + struct xen_domctl_vcpuaffinity vcpuaffinity; + struct xen_domctl_shadow_op shadow_op; + struct xen_domctl_max_mem max_mem; + struct xen_domctl_vcpucontext vcpucontext; + struct xen_domctl_getvcpuinfo getvcpuinfo; + struct xen_domctl_max_vcpus max_vcpus; + struct xen_domctl_scheduler_op scheduler_op; + struct xen_domctl_setdomainhandle setdomainhandle; + struct xen_domctl_setdebugging setdebugging; + struct xen_domctl_irq_permission irq_permission; + struct xen_domctl_iomem_permission iomem_permission; + struct xen_domctl_ioport_permission ioport_permission; + struct xen_domctl_hypercall_init hypercall_init; + struct xen_domctl_settimeoffset settimeoffset; + struct xen_domctl_disable_migrate disable_migrate; + struct xen_domctl_tsc_info tsc_info; + struct xen_domctl_hvmcontext hvmcontext; + struct xen_domctl_hvmcontext_partial hvmcontext_partial; + struct xen_domctl_address_size address_size; + struct xen_domctl_sendtrigger sendtrigger; + struct xen_domctl_get_device_group get_device_group; + struct xen_domctl_assign_device assign_device; + struct xen_domctl_bind_pt_irq bind_pt_irq; + struct xen_domctl_memory_mapping memory_mapping; + struct xen_domctl_ioport_mapping ioport_mapping; + struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr; + struct xen_domctl_ext_vcpucontext ext_vcpucontext; + struct xen_domctl_set_target set_target; + struct xen_domctl_subscribe subscribe; + struct xen_domctl_debug_op debug_op; + struct xen_domctl_mem_event_op mem_event_op; + struct xen_domctl_mem_sharing_op mem_sharing_op; +#if defined(__i386__) || defined(__x86_64__) + struct xen_domctl_cpuid cpuid; + struct xen_domctl_vcpuextstate vcpuextstate; + struct xen_domctl_vcpu_msrs vcpu_msrs; +#endif + struct xen_domctl_set_access_required access_required; + struct xen_domctl_audit_p2m audit_p2m; + struct xen_domctl_set_virq_handler set_virq_handler; + struct xen_domctl_set_max_evtchn set_max_evtchn; + struct xen_domctl_gdbsx_memio gdbsx_guest_memio; + struct xen_domctl_set_broken_page_p2m set_broken_page_p2m; + struct xen_domctl_cacheflush cacheflush; + struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; + struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; + struct xen_domctl_vnuma vnuma; + struct xen_domctl_psr_cmt_op psr_cmt_op; + uint8_t pad[128]; + } u; +}; +typedef struct xen_domctl xen_domctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); + +#endif /* __XEN_PUBLIC_DOMCTL_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/elfnote.h xen-4.6.5/extras/mini-os/include/xen/elfnote.h --- xen-4.6.0/extras/mini-os/include/xen/elfnote.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/elfnote.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,271 @@ +/****************************************************************************** + * elfnote.h + * + * Definitions used for the Xen ELF notes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Ian Campbell, XenSource Ltd. + */ + +#ifndef __XEN_PUBLIC_ELFNOTE_H__ +#define __XEN_PUBLIC_ELFNOTE_H__ + +/* + * `incontents 200 elfnotes ELF notes + * + * The notes should live in a PT_NOTE segment and have "Xen" in the + * name field. + * + * Numeric types are either 4 or 8 bytes depending on the content of + * the desc field. + * + * LEGACY indicated the fields in the legacy __xen_guest string which + * this a note type replaces. + * + * String values (for non-legacy) are NULL terminated ASCII, also known + * as ASCIZ type. + */ + +/* + * NAME=VALUE pair (string). + */ +#define XEN_ELFNOTE_INFO 0 + +/* + * The virtual address of the entry point (numeric). + * + * LEGACY: VIRT_ENTRY + */ +#define XEN_ELFNOTE_ENTRY 1 + +/* The virtual address of the hypercall transfer page (numeric). + * + * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page + * number not a virtual address) + */ +#define XEN_ELFNOTE_HYPERCALL_PAGE 2 + +/* The virtual address where the kernel image should be mapped (numeric). + * + * Defaults to 0. + * + * LEGACY: VIRT_BASE + */ +#define XEN_ELFNOTE_VIRT_BASE 3 + +/* + * The offset of the ELF paddr field from the actual required + * pseudo-physical address (numeric). + * + * This is used to maintain backwards compatibility with older kernels + * which wrote __PAGE_OFFSET into that field. This field defaults to 0 + * if not present. + * + * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE) + */ +#define XEN_ELFNOTE_PADDR_OFFSET 4 + +/* + * The version of Xen that we work with (string). + * + * LEGACY: XEN_VER + */ +#define XEN_ELFNOTE_XEN_VERSION 5 + +/* + * The name of the guest operating system (string). + * + * LEGACY: GUEST_OS + */ +#define XEN_ELFNOTE_GUEST_OS 6 + +/* + * The version of the guest operating system (string). + * + * LEGACY: GUEST_VER + */ +#define XEN_ELFNOTE_GUEST_VERSION 7 + +/* + * The loader type (string). + * + * LEGACY: LOADER + */ +#define XEN_ELFNOTE_LOADER 8 + +/* + * The kernel supports PAE (x86/32 only, string = "yes", "no" or + * "bimodal"). + * + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting + * may be given as "yes,bimodal" which will cause older Xen to treat + * this kernel as PAE. + * + * LEGACY: PAE (n.b. The legacy interface included a provision to + * indicate 'extended-cr3' support allowing L3 page tables to be + * placed above 4G. It is assumed that any kernel new enough to use + * these ELF notes will include this and therefore "yes" here is + * equivalent to "yes[entended-cr3]" in the __xen_guest interface. + */ +#define XEN_ELFNOTE_PAE_MODE 9 + +/* + * The features supported/required by this kernel (string). + * + * The string must consist of a list of feature names (as given in + * features.h, without the "XENFEAT_" prefix) separated by '|' + * characters. If a feature is required for the kernel to function + * then the feature name must be preceded by a '!' character. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_FEATURES 10 + +/* + * The kernel requires the symbol table to be loaded (string = "yes" or "no") + * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence + * of this string as a boolean flag rather than requiring "yes" or + * "no". + */ +#define XEN_ELFNOTE_BSD_SYMTAB 11 + +/* + * The lowest address the hypervisor hole can begin at (numeric). + * + * This must not be set higher than HYPERVISOR_VIRT_START. Its presence + * also indicates to the hypervisor that the kernel can deal with the + * hole starting at a higher address. + */ +#define XEN_ELFNOTE_HV_START_LOW 12 + +/* + * List of maddr_t-sized mask/value pairs describing how to recognize + * (non-present) L1 page table entries carrying valid MFNs (numeric). + */ +#define XEN_ELFNOTE_L1_MFN_VALID 13 + +/* + * Whether or not the guest supports cooperative suspend cancellation. + * This is a numeric value. + * + * Default is 0 + */ +#define XEN_ELFNOTE_SUSPEND_CANCEL 14 + +/* + * The (non-default) location the initial phys-to-machine map should be + * placed at by the hypervisor (Dom0) or the tools (DomU). + * The kernel must be prepared for this mapping to be established using + * large pages, despite such otherwise not being available to guests. + * The kernel must also be able to handle the page table pages used for + * this mapping not being accessible through the initial mapping. + * (Only x86-64 supports this at present.) + */ +#define XEN_ELFNOTE_INIT_P2M 15 + +/* + * Whether or not the guest can deal with being passed an initrd not + * mapped through its initial page tables. + */ +#define XEN_ELFNOTE_MOD_START_PFN 16 + +/* + * The features supported by this kernel (numeric). + * + * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a + * kernel to specify support for features that older hypervisors don't + * know about. The set of features 4.2 and newer hypervisors will + * consider supported by the kernel is the combination of the sets + * specified through this and the string note. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_SUPPORTED_FEATURES 17 + +/* + * The number of the highest elfnote defined. + */ +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES + +/* + * System information exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO + * note in case of a system crash. This note will contain various + * information about the system, see xen/include/xen/elfcore.h. + */ +#define XEN_ELFNOTE_CRASH_INFO 0x1000001 + +/* + * System registers exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS + * note per cpu in case of a system crash. This note is architecture + * specific and will contain registers not saved in the "CORE" note. + * See xen/include/xen/elfcore.h for more information. + */ +#define XEN_ELFNOTE_CRASH_REGS 0x1000002 + + +/* + * xen dump-core none note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE + * in its dump file to indicate that the file is xen dump-core + * file. This note doesn't have any other information. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 + +/* + * xen dump-core header note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER + * in its dump file. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 + +/* + * xen dump-core xen version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION + * in its dump file. It contains the xen version obtained via the + * XENVER hypercall. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 + +/* + * xen dump-core format version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION + * in its dump file. It contains a format version identifier. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 + +#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/event_channel.h xen-4.6.5/extras/mini-os/include/xen/event_channel.h --- xen-4.6.0/extras/mini-os/include/xen/event_channel.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/event_channel.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,385 @@ +/****************************************************************************** + * event_channel.h + * + * Event channels between domains. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, K A Fraser. + */ + +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ + +#include "xen.h" + +/* + * `incontents 150 evtchn Event Channels + * + * Event channels are the basic primitive provided by Xen for event + * notifications. An event is the Xen equivalent of a hardware + * interrupt. They essentially store one bit of information, the event + * of interest is signalled by transitioning this bit from 0 to 1. + * + * Notifications are received by a guest via an upcall from Xen, + * indicating when an event arrives (setting the bit). Further + * notifications are masked until the bit is cleared again (therefore, + * guests must check the value of the bit after re-enabling event + * delivery to ensure no missed notifications). + * + * Event notifications can be masked by setting a flag; this is + * equivalent to disabling interrupts and can be used to ensure + * atomicity of certain operations in the guest kernel. + * + * Event channels are represented by the evtchn_* fields in + * struct shared_info and struct vcpu_info. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args) + * ` + * @cmd == EVTCHNOP_* (event-channel operation). + * @args == struct evtchn_* Operation-specific extra arguments (NULL if none). + */ + +/* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */ +#define EVTCHNOP_bind_interdomain 0 +#define EVTCHNOP_bind_virq 1 +#define EVTCHNOP_bind_pirq 2 +#define EVTCHNOP_close 3 +#define EVTCHNOP_send 4 +#define EVTCHNOP_status 5 +#define EVTCHNOP_alloc_unbound 6 +#define EVTCHNOP_bind_ipi 7 +#define EVTCHNOP_bind_vcpu 8 +#define EVTCHNOP_unmask 9 +#define EVTCHNOP_reset 10 +#define EVTCHNOP_init_control 11 +#define EVTCHNOP_expand_array 12 +#define EVTCHNOP_set_priority 13 +/* ` } */ + +typedef uint32_t evtchn_port_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); + +/* + * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as + * accepting interdomain bindings from domain . A fresh port + * is allocated in and returned as . + * NOTES: + * 1. If the caller is unprivileged then must be DOMID_SELF. + * 2. may be DOMID_SELF, allowing loopback connections. + */ +struct evtchn_alloc_unbound { + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; +}; +typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t; + +/* + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between + * the calling domain and . must identify + * a port that is unbound and marked as accepting bindings from the calling + * domain. A fresh port is allocated in the calling domain and returned as + * . + * + * In case the peer domain has already tried to set our event channel + * pending, before it was bound, EVTCHNOP_bind_interdomain always sets + * the local event channel pending. + * + * The usual pattern of use, in the guest's upcall (or subsequent + * handler) is as follows: (Re-enable the event channel for subsequent + * signalling and then) check for the existence of whatever condition + * is being waited for by other means, and take whatever action is + * needed (if any). + * + * NOTES: + * 1. may be DOMID_SELF, allowing loopback connections. + */ +struct evtchn_bind_interdomain { + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; +}; +typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t; + +/* + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ on specified + * vcpu. + * NOTES: + * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list + * in xen.h for the classification of each VIRQ. + * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be + * re-bound via EVTCHNOP_bind_vcpu. + * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu. + * The allocated event channel is bound to the specified vcpu and the + * binding cannot be changed. + */ +struct evtchn_bind_virq { + /* IN parameters. */ + uint32_t virq; /* enum virq */ + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_virq evtchn_bind_virq_t; + +/* + * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ ). + * NOTES: + * 1. A physical IRQ may be bound to at most one event channel per domain. + * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. + */ +struct evtchn_bind_pirq { + /* IN parameters. */ + uint32_t pirq; +#define BIND_PIRQ__WILL_SHARE 1 + uint32_t flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_pirq evtchn_bind_pirq_t; + +/* + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. + * NOTES: + * 1. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +struct evtchn_bind_ipi { + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_ipi evtchn_bind_ipi_t; + +/* + * EVTCHNOP_close: Close a local event channel . If the channel is + * interdomain then the remote end is placed in the unbound state + * (EVTCHNSTAT_unbound), awaiting a new connection. + */ +struct evtchn_close { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_close evtchn_close_t; + +/* + * EVTCHNOP_send: Send an event to the remote end of the channel whose local + * endpoint is . + */ +struct evtchn_send { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_send evtchn_send_t; + +/* + * EVTCHNOP_status: Get the current status of the communication channel which + * has an endpoint at . + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may obtain the status of an event + * channel for which is not DOMID_SELF. + */ +struct evtchn_status { + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + uint32_t status; + uint32_t vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ + } u; +}; +typedef struct evtchn_status evtchn_status_t; + +/* + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an + * event is pending. + * NOTES: + * 1. IPI-bound channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 3. All other channels notify vcpu0 by default. This default is set when + * the channel is allocated (a port that is freed and subsequently reused + * has its binding reset to vcpu0). + */ +struct evtchn_bind_vcpu { + /* IN parameters. */ + evtchn_port_t port; + uint32_t vcpu; +}; +typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t; + +/* + * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver + * a notification to the appropriate VCPU if an event is pending. + */ +struct evtchn_unmask { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_unmask evtchn_unmask_t; + +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + * 3. Destroys all control blocks and event array, resets event channel + * operations to 2-level ABI if called with == DOMID_SELF and FIFO + * ABI was used. Guests should not bind events during EVTCHNOP_reset call + * as these events are likely to be lost. + */ +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; +typedef struct evtchn_reset evtchn_reset_t; + +/* + * EVTCHNOP_init_control: initialize the control block for the FIFO ABI. + * + * Note: any events that are currently pending will not be resent and + * will be lost. Guests should call this before binding any event to + * avoid losing any events. + */ +struct evtchn_init_control { + /* IN parameters. */ + uint64_t control_gfn; + uint32_t offset; + uint32_t vcpu; + /* OUT parameters. */ + uint8_t link_bits; + uint8_t _pad[7]; +}; +typedef struct evtchn_init_control evtchn_init_control_t; + +/* + * EVTCHNOP_expand_array: add an additional page to the event array. + */ +struct evtchn_expand_array { + /* IN parameters. */ + uint64_t array_gfn; +}; +typedef struct evtchn_expand_array evtchn_expand_array_t; + +/* + * EVTCHNOP_set_priority: set the priority for an event channel. + */ +struct evtchn_set_priority { + /* IN parameters. */ + uint32_t port; + uint32_t priority; +}; +typedef struct evtchn_set_priority evtchn_set_priority_t; + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op) + * ` + * Superceded by new event_channel_op() hypercall since 0x00030202. + */ +struct evtchn_op { + uint32_t cmd; /* enum event_channel_op */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; +}; +typedef struct evtchn_op evtchn_op_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); + +/* + * 2-level ABI + */ + +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64) + +/* + * FIFO ABI + */ + +/* Events may have priorities from 0 (highest) to 15 (lowest). */ +#define EVTCHN_FIFO_PRIORITY_MAX 0 +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7 +#define EVTCHN_FIFO_PRIORITY_MIN 15 + +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1) + +typedef uint32_t event_word_t; + +#define EVTCHN_FIFO_PENDING 31 +#define EVTCHN_FIFO_MASKED 30 +#define EVTCHN_FIFO_LINKED 29 +#define EVTCHN_FIFO_BUSY 28 + +#define EVTCHN_FIFO_LINK_BITS 17 +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1) + +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS) + +struct evtchn_fifo_control_block { + uint32_t ready; + uint32_t _rsvd; + uint32_t head[EVTCHN_FIFO_MAX_QUEUES]; +}; +typedef struct evtchn_fifo_control_block evtchn_fifo_control_block_t; + +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/features.h xen-4.6.5/extras/mini-os/include/xen/features.h --- xen-4.6.0/extras/mini-os/include/xen/features.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/features.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,114 @@ +/****************************************************************************** + * features.h + * + * Feature flags, reported by XENVER_get_features. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_FEATURES_H__ +#define __XEN_PUBLIC_FEATURES_H__ + +/* + * `incontents 200 elfnotes_features XEN_ELFNOTE_FEATURES + * + * The list of all the features the guest supports. They are set by + * parsing the XEN_ELFNOTE_FEATURES and XEN_ELFNOTE_SUPPORTED_FEATURES + * string. The format is the feature names (as given here without the + * "XENFEAT_" prefix) separated by '|' characters. + * If a feature is required for the kernel to function then the feature name + * must be preceded by a '!' character. + * + * Note that if XEN_ELFNOTE_SUPPORTED_FEATURES is used, then in the + * XENFEAT_dom0 MUST be set if the guest is to be booted as dom0, + */ + +/* + * If set, the guest does not need to write-protect its pagetables, and can + * update them via direct writes. + */ +#define XENFEAT_writable_page_tables 0 + +/* + * If set, the guest does not need to write-protect its segment descriptor + * tables, and can update them via direct writes. + */ +#define XENFEAT_writable_descriptor_tables 1 + +/* + * If set, translation between the guest's 'pseudo-physical' address space + * and the host's machine address space are handled by the hypervisor. In this + * mode the guest does not need to perform phys-to/from-machine translations + * when performing page table operations. + */ +#define XENFEAT_auto_translated_physmap 2 + +/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */ +#define XENFEAT_supervisor_mode_kernel 3 + +/* + * If set, the guest does not need to allocate x86 PAE page directories + * below 4GB. This flag is usually implied by auto_translated_physmap. + */ +#define XENFEAT_pae_pgdir_above_4gb 4 + +/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ +#define XENFEAT_mmu_pt_update_preserve_ad 5 + +/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */ +#define XENFEAT_highmem_assist 6 + +/* + * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel + * available pte bits. + */ +#define XENFEAT_gnttab_map_avail_bits 7 + +/* x86: Does this Xen host support the HVM callback vector type? */ +#define XENFEAT_hvm_callback_vector 8 + +/* x86: pvclock algorithm is safe to use on HVM */ +#define XENFEAT_hvm_safe_pvclock 9 + +/* x86: pirq can be used by HVM guests */ +#define XENFEAT_hvm_pirqs 10 + +/* operation as Dom0 is supported */ +#define XENFEAT_dom0 11 + +/* Xen also maps grant references at pfn = mfn. + * This feature flag is deprecated and should not be used. +#define XENFEAT_grant_map_identity 12 + */ + +#define XENFEAT_NR_SUBMAPS 1 + +#endif /* __XEN_PUBLIC_FEATURES_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/gcov.h xen-4.6.5/extras/mini-os/include/xen/gcov.h --- xen-4.6.0/extras/mini-os/include/xen/gcov.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/gcov.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,115 @@ +/****************************************************************************** + * gcov.h + * + * Coverage structures exported by Xen. + * Structure is different from Gcc one. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2013, Citrix Systems R&D Ltd. + */ + +#ifndef __XEN_PUBLIC_GCOV_H__ +#define __XEN_PUBLIC_GCOV_H__ __XEN_PUBLIC_GCOV_H__ + +#define XENCOV_COUNTERS 5 +#define XENCOV_TAG_BASE 0x58544300u +#define XENCOV_TAG_FILE (XENCOV_TAG_BASE+0x46u) +#define XENCOV_TAG_FUNC (XENCOV_TAG_BASE+0x66u) +#define XENCOV_TAG_COUNTER(n) (XENCOV_TAG_BASE+0x30u+((n)&0xfu)) +#define XENCOV_TAG_END (XENCOV_TAG_BASE+0x2eu) +#define XENCOV_IS_TAG_COUNTER(n) \ + ((n) >= XENCOV_TAG_COUNTER(0) && (n) < XENCOV_TAG_COUNTER(XENCOV_COUNTERS)) +#define XENCOV_COUNTER_NUM(n) ((n)-XENCOV_TAG_COUNTER(0)) + +/* + * The main structure for the blob is + * BLOB := FILE.. END + * FILE := TAG_FILE VERSION STAMP FILENAME COUNTERS FUNCTIONS + * FILENAME := LEN characters + * characters are padded to 32 bit + * LEN := 32 bit value + * COUNTERS := TAG_COUNTER(n) NUM COUNTER.. + * NUM := 32 bit valie + * COUNTER := 64 bit value + * FUNCTIONS := TAG_FUNC NUM FUNCTION.. + * FUNCTION := IDENT CHECKSUM NUM_COUNTERS + * + * All tagged structures are aligned to 8 bytes + */ + +/** + * File information + * Prefixed with XENCOV_TAG_FILE and a string with filename + * Aligned to 8 bytes + */ +struct xencov_file +{ + uint32_t tag; /* XENCOV_TAG_FILE */ + uint32_t version; + uint32_t stamp; + uint32_t fn_len; + char filename[1]; +}; + + +/** + * Counters information + * Prefixed with XENCOV_TAG_COUNTER(n) where n is 0..(XENCOV_COUNTERS-1) + * Aligned to 8 bytes + */ +struct xencov_counter +{ + uint32_t tag; /* XENCOV_TAG_COUNTER(n) */ + uint32_t num; + uint64_t values[1]; +}; + +/** + * Information for each function + * Number of counter is equal to the number of counter structures got before + */ +struct xencov_function +{ + uint32_t ident; + uint32_t checksum; + uint32_t num_counters[1]; +}; + +/** + * Information for all functions + * Aligned to 8 bytes + */ +struct xencov_functions +{ + uint32_t tag; /* XENCOV_TAG_FUNC */ + uint32_t num; + struct xencov_function xencov_function[1]; +}; + +/** + * Terminator + */ +struct xencov_end +{ + uint32_t tag; /* XENCOV_TAG_END */ +}; + +#endif /* __XEN_PUBLIC_GCOV_H__ */ + diff -Nru xen-4.6.0/extras/mini-os/include/xen/grant_table.h xen-4.6.5/extras/mini-os/include/xen/grant_table.h --- xen-4.6.0/extras/mini-os/include/xen/grant_table.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/grant_table.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,682 @@ +/****************************************************************************** + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + +#include "xen.h" + +/* + * `incontents 150 gnttab Grant Tables + * + * Xen's grant tables provide a generic mechanism to memory sharing + * between domains. This shared memory interface underpins the split + * device drivers for block and network IO. + * + * Each domain has its own grant table. This is a data structure that + * is shared with Xen; it allows the domain to tell Xen what kind of + * permissions other domains have on its pages. Entries in the grant + * table are identified by grant references. A grant reference is an + * integer, which indexes into the grant table. It acts as a + * capability which the grantee can use to perform operations on the + * granter’s memory. + * + * This capability-based system allows shared-memory communications + * between unprivileged domains. A grant reference also encapsulates + * the details of a shared page, removing the need for a domain to + * know the real machine address of a page it is sharing. This makes + * it possible to share memory correctly with domains running in + * fully virtualised memory. + */ + +/*********************************** + * GRANT TABLE REPRESENTATION + */ + +/* Some rough guidelines on accessing and updating grant-table entries + * in a concurrency-safe manner. For more information, Linux contains a + * reference implementation for guest OSes (drivers/xen/grant_table.c, see + * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD + * + * NB. WMB is a no-op on current-generation x86 processors. However, a + * compiler barrier will still be required. + * + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + * + * Invalidating an unused GTF_permit_access entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * + * Invalidating an in-use GTF_permit_access entry: + * This cannot be done directly. Request assistance from the domain controller + * which can set a timeout on the use of a grant entry and take necessary + * action. (NB. This is not yet implemented!). + * + * Invalidating an unused GTF_accept_transfer entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & GTF_transfer_committed). [*] + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. + * The guest must /not/ modify the grant entry until the address of the + * transferred frame is written. It is safe for the guest to spin waiting + * for this to occur (detect by observing GTF_transfer_completed in + * ent->flags). + * + * Invalidating a committed GTF_accept_transfer entry: + * 1. Wait for (ent->flags & GTF_transfer_completed). + * + * Changing a GTF_permit_access from writable to read-only: + * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. + * + * Changing a GTF_permit_access from read-only to writable: + * Use SMP-safe bit-setting instruction. + */ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +/* + * A grant table comprises a packed array of grant entries in one or more + * page frames shared between Xen and a guest. + * [XEN]: This field is written by Xen and read by the sharing guest. + * [GST]: This field is written by the guest and read by Xen. + */ + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +#if __XEN_INTERFACE_VERSION__ < 0x0003020a +#define grant_entry_v1 grant_entry +#define grant_entry_v1_t grant_entry_t +#endif +struct grant_entry_v1 { + /* GTF_xxx: various type and flag information. [XEN,GST] */ + uint16_t flags; + /* The domain being granted foreign privileges. [GST] */ + domid_t domid; + /* + * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + */ + uint32_t frame; +}; +typedef struct grant_entry_v1 grant_entry_v1_t; + +/* The first few grant table entries will be preserved across grant table + * version changes and may be pre-populated at domain creation by tools. + */ +#define GNTTAB_NR_RESERVED_ENTRIES 8 +#define GNTTAB_RESERVED_CONSOLE 0 +#define GNTTAB_RESERVED_XENSTORE 1 + +/* + * Type of grant entry. + * GTF_invalid: This grant entry grants no privileges. + * GTF_permit_access: Allow @domid to map/access @frame. + * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame + * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. + */ +#define GTF_invalid (0U<<0) +#define GTF_permit_access (1U<<0) +#define GTF_accept_transfer (2U<<0) +#define GTF_transitive (3U<<0) +#define GTF_type_mask (3U<<0) + +/* + * Subflags for GTF_permit_access. + * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] + * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] + * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] + */ +#define _GTF_readonly (2) +#define GTF_readonly (1U<<_GTF_readonly) +#define _GTF_reading (3) +#define GTF_reading (1U<<_GTF_reading) +#define _GTF_writing (4) +#define GTF_writing (1U<<_GTF_writing) +#define _GTF_PWT (5) +#define GTF_PWT (1U<<_GTF_PWT) +#define _GTF_PCD (6) +#define GTF_PCD (1U<<_GTF_PCD) +#define _GTF_PAT (7) +#define GTF_PAT (1U<<_GTF_PAT) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U<<_GTF_sub_page) + +/* + * Subflags for GTF_accept_transfer: + * GTF_transfer_committed: Xen sets this flag to indicate that it is committed + * to transferring ownership of a page frame. When a guest sees this flag + * it must /not/ modify the grant entry until GTF_transfer_completed is + * set by Xen. + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag + * after reading GTF_transfer_committed. Xen will always write the frame + * address, followed by ORing this flag, in a timely manner. + */ +#define _GTF_transfer_committed (2) +#define GTF_transfer_committed (1U<<_GTF_transfer_committed) +#define _GTF_transfer_completed (3) +#define GTF_transfer_completed (1U<<_GTF_transfer_completed) + +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. + */ +struct grant_entry_header { + uint16_t flags; + domid_t domid; +}; +typedef struct grant_entry_header grant_entry_header_t; + +/* + * Version 2 of the grant entry structure. + */ +union grant_entry_v2 { + grant_entry_header_t hdr; + + /* + * This member is used for V1-style full page grants, where either: + * + * -- hdr.type is GTF_accept_transfer, or + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. + * + * In that case, the frame field has the same semantics as the + * field of the same name in the V1 entry structure. + */ + struct { + grant_entry_header_t hdr; + uint32_t pad0; + uint64_t frame; + } full_page; + + /* + * If the grant type is GTF_grant_access and GTF_sub_page is set, + * @domid is allowed to access bytes [@page_off,@page_off+@length) + * in frame @frame. + */ + struct { + grant_entry_header_t hdr; + uint16_t page_off; + uint16_t length; + uint64_t frame; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use the + * grant @gref in domain @trans_domid, as if it was the local + * domain. Obviously, the transitive access must be compatible + * with the original grant. + * + * The current version of Xen does not allow transitive grants + * to be mapped. + */ + struct { + grant_entry_header_t hdr; + domid_t trans_domid; + uint16_t pad0; + grant_ref_t gref; + } transitive; + + uint32_t __spacer[4]; /* Pad to a power of two */ +}; +typedef union grant_entry_v2 grant_entry_v2_t; + +typedef uint16_t grant_status_t; + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/*********************************** + * GRANT TABLE QUERIES AND USES + */ + +/* ` enum neg_errnoval + * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd, + * ` void *args, + * ` unsigned int count) + * ` + * + * @args points to an array of a per-command data structure. The array + * has @count members + */ + +/* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */ +#define GNTTABOP_map_grant_ref 0 +#define GNTTABOP_unmap_grant_ref 1 +#define GNTTABOP_setup_table 2 +#define GNTTABOP_dump_table 3 +#define GNTTABOP_transfer 4 +#define GNTTABOP_copy 5 +#define GNTTABOP_query_size 6 +#define GNTTABOP_unmap_and_replace 7 +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +#define GNTTABOP_set_version 8 +#define GNTTABOP_get_status_frames 9 +#define GNTTABOP_get_version 10 +#define GNTTABOP_swap_grant_ref 11 +#define GNTTABOP_cache_flush 12 +#endif /* __XEN_INTERFACE_VERSION__ */ +/* ` } */ + +/* + * Handle to track a mapping created via a grant reference. + */ +typedef uint32_t grant_handle_t; + +/* + * GNTTABOP_map_grant_ref: Map the grant entry (,) for access + * by devices and/or host CPUs. If successful, is a tracking number + * that must be presented later to destroy the mapping(s). On error, + * is a negative status code. + * NOTES: + * 1. If GNTMAP_device_map is specified then is the address + * via which I/O devices may access the granted frame. + * 2. If GNTMAP_host_map is specified then a mapping will be added at + * either a host virtual address in the current address space, or at + * a PTE at the specified machine address. The type of mapping to + * perform is selected through the GNTMAP_contains_pte flag, and the + * address is specified in . + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a + * host mapping is destroyed by other means then it is *NOT* guaranteed + * to be accounted to the correct grant reference! + */ +struct gnttab_map_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint32_t flags; /* GNTMAP_* */ + grant_ref_t ref; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ + grant_handle_t handle; + uint64_t dev_bus_addr; +}; +typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t); + +/* + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings + * tracked by . If or is zero, that + * field is ignored. If non-zero, they must refer to a device/host mapping + * that is tracked by + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 3. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +struct gnttab_unmap_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint64_t dev_bus_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); + +/* + * GNTTABOP_setup_table: Set up a grant table for comprising at least + * pages. The frame addresses are written to the . + * Only addresses are written, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + * 3. Xen may not support more than a single grant-table page per domain. + */ +struct gnttab_setup_table { + /* IN parameters. */ + domid_t dom; + uint32_t nr_frames; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +#if __XEN_INTERFACE_VERSION__ < 0x00040300 + XEN_GUEST_HANDLE(ulong) frame_list; +#else + XEN_GUEST_HANDLE(xen_pfn_t) frame_list; +#endif +}; +typedef struct gnttab_setup_table gnttab_setup_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t); + +/* + * GNTTABOP_dump_table: Dump the contents of the grant table to the + * xen console. Debugging use only. + */ +struct gnttab_dump_table { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_dump_table gnttab_dump_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); + +/* + * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The + * foreign domain has previously registered its interest in the transfer via + * . + * + * Note that, even if the transfer fails, the specified page no longer belongs + * to the calling domain *unless* the error is GNTST_bad_page. + */ +struct gnttab_transfer { + /* IN parameters. */ + xen_pfn_t mfn; + domid_t domid; + grant_ref_t ref; + /* OUT parameters. */ + int16_t status; +}; +typedef struct gnttab_transfer gnttab_transfer_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); + + +/* + * GNTTABOP_copy: Hypervisor based copy + * source and destinations can be eithers MFNs or, for foreign domains, + * grant references. the foreign domain has to grant read/write access + * in its grant table. + * + * The flags specify what type source and destinations are (either MFN + * or grant reference). + * + * Note that this can also be used to copy data between two domains + * via a third party if the source and destination domains had previously + * grant appropriate access to their pages to the third party. + * + * source_offset specifies an offset in the source frame, dest_offset + * the offset in the target frame and len specifies the number of + * bytes to be copied. + */ + +#define _GNTCOPY_source_gref (0) +#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) +#define _GNTCOPY_dest_gref (1) +#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) + +struct gnttab_copy { + /* IN parameters. */ + struct { + union { + grant_ref_t ref; + xen_pfn_t gmfn; + } u; + domid_t domid; + uint16_t offset; + } source, dest; + uint16_t len; + uint16_t flags; /* GNTCOPY_* */ + /* OUT parameters. */ + int16_t status; +}; +typedef struct gnttab_copy gnttab_copy_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); + +/* + * GNTTABOP_query_size: Query the current and maximum sizes of the shared + * grant table. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +struct gnttab_query_size { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + uint32_t nr_frames; + uint32_t max_nr_frames; + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_query_size gnttab_query_size_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); + +/* + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings + * tracked by but atomically replace the page table entry with one + * pointing to the machine address under . will be + * redirected to the null entry. + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 2. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +struct gnttab_unmap_and_replace { + /* IN parameters. */ + uint64_t host_addr; + uint64_t new_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); + +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +struct gnttab_set_version { + /* IN/OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_set_version gnttab_set_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); + + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for . In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * specify the size of vector . + * The frame addresses are returned in the . + * Only addresses are returned, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +struct gnttab_get_status_frames { + /* IN parameters. */ + uint32_t nr_frames; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ + XEN_GUEST_HANDLE(uint64_t) frame_list; +}; +typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); + +/* + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain . + */ +struct gnttab_get_version { + /* IN parameters */ + domid_t dom; + uint16_t pad; + /* OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_get_version gnttab_get_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t); + +/* + * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries. + */ +struct gnttab_swap_grant_ref { + /* IN parameters */ + grant_ref_t ref_a; + grant_ref_t ref_b; + /* OUT parameters */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); + +/* + * Issue one or more cache maintenance operations on a portion of a + * page granted to the calling domain by a foreign domain. + */ +struct gnttab_cache_flush { + union { + uint64_t dev_bus_addr; + grant_ref_t ref; + } a; + uint16_t offset; /* offset from start of grant */ + uint16_t length; /* size within the grant */ +#define GNTTAB_CACHE_CLEAN (1<<0) +#define GNTTAB_CACHE_INVAL (1<<1) +#define GNTTAB_CACHE_SOURCE_GREF (1<<31) + uint32_t op; +}; +typedef struct gnttab_cache_flush gnttab_cache_flush_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_cache_flush_t); + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/* + * Bitfield values for gnttab_map_grant_ref.flags. + */ + /* Map the grant entry for access by I/O devices. */ +#define _GNTMAP_device_map (0) +#define GNTMAP_device_map (1<<_GNTMAP_device_map) + /* Map the grant entry for access by host CPUs. */ +#define _GNTMAP_host_map (1) +#define GNTMAP_host_map (1<<_GNTMAP_host_map) + /* Accesses to the granted frame will be restricted to read-only access. */ +#define _GNTMAP_readonly (2) +#define GNTMAP_readonly (1<<_GNTMAP_readonly) + /* + * GNTMAP_host_map subflag: + * 0 => The host mapping is usable only by the guest OS. + * 1 => The host mapping is usable by guest OS + current application. + */ +#define _GNTMAP_application_map (3) +#define GNTMAP_application_map (1<<_GNTMAP_application_map) + + /* + * GNTMAP_contains_pte subflag: + * 0 => This map request contains a host virtual address. + * 1 => This map request contains the machine addess of the PTE to update. + */ +#define _GNTMAP_contains_pte (4) +#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) + +#define _GNTMAP_can_fail (5) +#define GNTMAP_can_fail (1<<_GNTMAP_can_fail) + +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) + +/* + * Values for error status returns. All errors are -ve. + */ +/* ` enum grant_status { */ +#define GNTST_okay (0) /* Normal return. */ +#define GNTST_general_error (-1) /* General undefined error. */ +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ +#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ +#define GNTST_bad_page (-9) /* Specified page was invalid for op. */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Operation not done; try again. */ +/* ` } */ + +#define GNTTABOP_error_msgs { \ + "okay", \ + "undefined error", \ + "unrecognised domain id", \ + "invalid grant reference", \ + "invalid mapping handle", \ + "invalid virtual address", \ + "invalid device address", \ + "no spare translation slot in the I/O MMU", \ + "permission denied", \ + "bad page", \ + "copy arguments cross page boundary", \ + "page address size too large", \ + "operation not done; try again" \ +} + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/hvm/e820.h xen-4.6.5/extras/mini-os/include/xen/hvm/e820.h --- xen-4.6.0/extras/mini-os/include/xen/hvm/e820.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/hvm/e820.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,34 @@ + +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_E820_H__ +#define __XEN_PUBLIC_HVM_E820_H__ + +/* E820 location in HVM virtual address space. */ +#define HVM_E820_PAGE 0x00090000 +#define HVM_E820_NR_OFFSET 0x000001E8 +#define HVM_E820_OFFSET 0x000002D0 + +#define HVM_BELOW_4G_RAM_END 0xF0000000 +#define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END +#define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START) + +#endif /* __XEN_PUBLIC_HVM_E820_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/hvm/hvm_info_table.h xen-4.6.5/extras/mini-os/include/xen/hvm/hvm_info_table.h --- xen-4.6.0/extras/mini-os/include/xen/hvm/hvm_info_table.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/hvm/hvm_info_table.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,72 @@ +/****************************************************************************** + * hvm/hvm_info_table.h + * + * HVM parameter and information table, written into guest memory map. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ +#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ + +#define HVM_INFO_PFN 0x09F +#define HVM_INFO_OFFSET 0x800 +#define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET) + +/* Maximum we can support with current vLAPIC ID mapping. */ +#define HVM_MAX_VCPUS 128 + +struct hvm_info_table { + char signature[8]; /* "HVM INFO" */ + uint32_t length; + uint8_t checksum; + + /* Should firmware build APIC descriptors (APIC MADT / MP BIOS)? */ + uint8_t apic_mode; + + /* How many CPUs does this domain have? */ + uint32_t nr_vcpus; + + /* + * MEMORY MAP provided by HVM domain builder. + * Notes: + * 1. page_to_phys(x) = x << 12 + * 2. If a field is zero, the corresponding range does not exist. + */ + /* + * 0x0 to page_to_phys(low_mem_pgend)-1: + * RAM below 4GB (except for VGA hole 0xA0000-0xBFFFF) + */ + uint32_t low_mem_pgend; + /* + * page_to_phys(reserved_mem_pgstart) to 0xFFFFFFFF: + * Reserved for special memory mappings + */ + uint32_t reserved_mem_pgstart; + /* + * 0x100000000 to page_to_phys(high_mem_pgend)-1: + * RAM above 4GB + */ + uint32_t high_mem_pgend; + + /* Bitmap of which CPUs are online at boot time. */ + uint8_t vcpu_online[(HVM_MAX_VCPUS + 7)/8]; +}; + +#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/hvm/hvm_op.h xen-4.6.5/extras/mini-os/include/xen/hvm/hvm_op.h --- xen-4.6.0/extras/mini-os/include/xen/hvm/hvm_op.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/hvm/hvm_op.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,402 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ +#define __XEN_PUBLIC_HVM_HVM_OP_H__ + +#include "../xen.h" +#include "../trace.h" +#include "../event_channel.h" + +/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ +#define HVMOP_set_param 0 +#define HVMOP_get_param 1 +struct xen_hvm_param { + domid_t domid; /* IN */ + uint32_t index; /* IN */ + uint64_t value; /* IN/OUT */ +}; +typedef struct xen_hvm_param xen_hvm_param_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); + +/* Set the logical level of one of a domain's PCI INTx wires. */ +#define HVMOP_set_pci_intx_level 2 +struct xen_hvm_set_pci_intx_level { + /* Domain to be updated. */ + domid_t domid; + /* PCI INTx identification in PCI topology (domain:bus:device:intx). */ + uint8_t domain, bus, device, intx; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t); + +/* Set the logical level of one of a domain's ISA IRQ wires. */ +#define HVMOP_set_isa_irq_level 3 +struct xen_hvm_set_isa_irq_level { + /* Domain to be updated. */ + domid_t domid; + /* ISA device identification, by ISA IRQ (0-15). */ + uint8_t isa_irq; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t); + +#define HVMOP_set_pci_link_route 4 +struct xen_hvm_set_pci_link_route { + /* Domain to be updated. */ + domid_t domid; + /* PCI link identifier (0-3). */ + uint8_t link; + /* ISA IRQ (1-15), or 0 (disable link). */ + uint8_t isa_irq; +}; +typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); + +/* Flushes all VCPU TLBs: @arg must be NULL. */ +#define HVMOP_flush_tlbs 5 + +typedef enum { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ + HVMMEM_mmio_write_dm /* Read-only; writes go to the device model */ +} hvmmem_type_t; + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* Track dirty VRAM. */ +#define HVMOP_track_dirty_vram 6 +struct xen_hvm_track_dirty_vram { + /* Domain to be tracked. */ + domid_t domid; + /* Number of pages to track. */ + uint32_t nr; + /* First pfn to track. */ + uint64_aligned_t first_pfn; + /* OUT variable. */ + /* Dirty bitmap buffer. */ + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; +}; +typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t); + +/* Notify that some pages got modified by the Device Model. */ +#define HVMOP_modified_memory 7 +struct xen_hvm_modified_memory { + /* Domain to be updated. */ + domid_t domid; + /* Number of pages. */ + uint32_t nr; + /* First pfn. */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); + +#define HVMOP_set_mem_type 8 +/* Notify that a region of memory is to be treated in a specific way. */ +struct xen_hvm_set_mem_type { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_type; + /* Number of pages. */ + uint32_t nr; + /* First pfn. */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +/* Hint from PV drivers for pagetable destruction. */ +#define HVMOP_pagetable_dying 9 +struct xen_hvm_pagetable_dying { + /* Domain with a pagetable about to be destroyed. */ + domid_t domid; + uint16_t pad[3]; /* align next field on 8-byte boundary */ + /* guest physical address of the toplevel pagetable dying */ + uint64_t gpa; +}; +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t); + +/* Get the current Xen time, in nanoseconds since system boot. */ +#define HVMOP_get_time 10 +struct xen_hvm_get_time { + uint64_t now; /* OUT */ +}; +typedef struct xen_hvm_get_time xen_hvm_get_time_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_time_t); + +#define HVMOP_xentrace 11 +struct xen_hvm_xentrace { + uint16_t event, extra_bytes; + uint8_t extra[TRACE_EXTRA_MAX * sizeof(uint32_t)]; +}; +typedef struct xen_hvm_xentrace xen_hvm_xentrace_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t); + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* Deprecated by XENMEM_access_op_set_access */ +#define HVMOP_set_mem_access 12 + +/* Deprecated by XENMEM_access_op_get_access */ +#define HVMOP_get_mem_access 13 + +#define HVMOP_inject_trap 14 +/* Inject a trap into a VCPU, which will get taken up on the next + * scheduling of it. Note that the caller should know enough of the + * state of the CPU before injecting, to know what the effect of + * injecting the trap will be. + */ +struct xen_hvm_inject_trap { + /* Domain to be queried. */ + domid_t domid; + /* VCPU */ + uint32_t vcpuid; + /* Vector number */ + uint32_t vector; + /* Trap type (HVMOP_TRAP_*) */ + uint32_t type; +/* NB. This enumeration precisely matches hvm.h:X86_EVENTTYPE_* */ +# define HVMOP_TRAP_ext_int 0 /* external interrupt */ +# define HVMOP_TRAP_nmi 2 /* nmi */ +# define HVMOP_TRAP_hw_exc 3 /* hardware exception */ +# define HVMOP_TRAP_sw_int 4 /* software interrupt (CD nn) */ +# define HVMOP_TRAP_pri_sw_exc 5 /* ICEBP (F1) */ +# define HVMOP_TRAP_sw_exc 6 /* INT3 (CC), INTO (CE) */ + /* Error code, or ~0u to skip */ + uint32_t error_code; + /* Intruction length */ + uint32_t insn_len; + /* CR2 for page faults */ + uint64_aligned_t cr2; +}; +typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#define HVMOP_get_mem_type 15 +/* Return hvmmem_type_t for the specified pfn. */ +struct xen_hvm_get_mem_type { + /* Domain to be queried. */ + domid_t domid; + /* OUT variable. */ + uint16_t mem_type; + uint16_t pad[2]; /* align next field on 8-byte boundary */ + /* IN variable. */ + uint64_t pfn; +}; +typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t); + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* MSI injection for emulated devices */ +#define HVMOP_inject_msi 16 +struct xen_hvm_inject_msi { + /* Domain to be injected */ + domid_t domid; + /* Data -- lower 32 bits */ + uint32_t data; + /* Address (0xfeexxxxx) */ + uint64_t addr; +}; +typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t); + +/* + * IOREQ Servers + * + * The interface between an I/O emulator an Xen is called an IOREQ Server. + * A domain supports a single 'legacy' IOREQ Server which is instantiated if + * parameter... + * + * HVM_PARAM_IOREQ_PFN is read (to get the gmfn containing the synchronous + * ioreq structures), or... + * HVM_PARAM_BUFIOREQ_PFN is read (to get the gmfn containing the buffered + * ioreq ring), or... + * HVM_PARAM_BUFIOREQ_EVTCHN is read (to get the event channel that Xen uses + * to request buffered I/O emulation). + * + * The following hypercalls facilitate the creation of IOREQ Servers for + * 'secondary' emulators which are invoked to implement port I/O, memory, or + * PCI config space ranges which they explicitly register. + */ + +typedef uint16_t ioservid_t; + +/* + * HVMOP_create_ioreq_server: Instantiate a new IOREQ Server for a secondary + * emulator servicing domain . + * + * The handed back is unique for . If is zero + * the buffered ioreq ring will not be allocated and hence all emulation + * requestes to this server will be synchronous. + */ +#define HVMOP_create_ioreq_server 17 +struct xen_hvm_create_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ + uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */ + ioservid_t id; /* OUT - server id */ +}; +typedef struct xen_hvm_create_ioreq_server xen_hvm_create_ioreq_server_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_create_ioreq_server_t); + +/* + * HVMOP_get_ioreq_server_info: Get all the information necessary to access + * IOREQ Server . + * + * The emulator needs to map the synchronous ioreq structures and buffered + * ioreq ring (if it exists) that Xen uses to request emulation. These are + * hosted in domain 's gmfns and + * respectively. In addition, if the IOREQ Server is handling buffered + * emulation requests, the emulator needs to bind to event channel + * to listen for them. (The event channels used for + * synchronous emulation requests are specified in the per-CPU ioreq + * structures in ). + * If the IOREQ Server is not handling buffered emulation requests then the + * values handed back in and will both be 0. + */ +#define HVMOP_get_ioreq_server_info 18 +struct xen_hvm_get_ioreq_server_info { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + evtchn_port_t bufioreq_port; /* OUT - buffered ioreq port */ + uint64_aligned_t ioreq_pfn; /* OUT - sync ioreq pfn */ + uint64_aligned_t bufioreq_pfn; /* OUT - buffered ioreq pfn */ +}; +typedef struct xen_hvm_get_ioreq_server_info xen_hvm_get_ioreq_server_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_ioreq_server_info_t); + +/* + * HVM_map_io_range_to_ioreq_server: Register an I/O range of domain + * for emulation by the client of IOREQ + * Server + * HVM_unmap_io_range_from_ioreq_server: Deregister an I/O range of + * for emulation by the client of IOREQ + * Server + * + * There are three types of I/O that can be emulated: port I/O, memory accesses + * and PCI config space accesses. The field denotes which type of range + * the and (inclusive) fields are specifying. + * PCI config space ranges are specified by segment/bus/device/function values + * which should be encoded using the HVMOP_PCI_SBDF helper macro below. + * + * NOTE: unless an emulation request falls entirely within a range mapped + * by a secondary emulator, it will not be passed to that emulator. + */ +#define HVMOP_map_io_range_to_ioreq_server 19 +#define HVMOP_unmap_io_range_from_ioreq_server 20 +struct xen_hvm_io_range { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + uint32_t type; /* IN - type of range */ +# define HVMOP_IO_RANGE_PORT 0 /* I/O port range */ +# define HVMOP_IO_RANGE_MEMORY 1 /* MMIO range */ +# define HVMOP_IO_RANGE_PCI 2 /* PCI segment/bus/dev/func range */ + uint64_aligned_t start, end; /* IN - inclusive start and end of range */ +}; +typedef struct xen_hvm_io_range xen_hvm_io_range_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_io_range_t); + +#define HVMOP_PCI_SBDF(s,b,d,f) \ + ((((s) & 0xffff) << 16) | \ + (((b) & 0xff) << 8) | \ + (((d) & 0x1f) << 3) | \ + ((f) & 0x07)) + +/* + * HVMOP_destroy_ioreq_server: Destroy the IOREQ Server servicing domain + * . + * + * Any registered I/O ranges will be automatically deregistered. + */ +#define HVMOP_destroy_ioreq_server 21 +struct xen_hvm_destroy_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ +}; +typedef struct xen_hvm_destroy_ioreq_server xen_hvm_destroy_ioreq_server_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_destroy_ioreq_server_t); + +/* + * HVMOP_set_ioreq_server_state: Enable or disable the IOREQ Server servicing + * domain . + * + * The IOREQ Server will not be passed any emulation requests until it is in the + * enabled state. + * Note that the contents of the ioreq_pfn and bufioreq_fn (see + * HVMOP_get_ioreq_server_info) are not meaningful until the IOREQ Server is in + * the enabled state. + */ +#define HVMOP_set_ioreq_server_state 22 +struct xen_hvm_set_ioreq_server_state { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + uint8_t enabled; /* IN - enabled? */ +}; +typedef struct xen_hvm_set_ioreq_server_state xen_hvm_set_ioreq_server_state_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#if defined(__i386__) || defined(__x86_64__) + +/* + * HVMOP_set_evtchn_upcall_vector: Set a that should be used for event + * channel upcalls on the specified . If set, + * this vector will be used in preference to the + * domain global callback via (see + * HVM_PARAM_CALLBACK_IRQ). + */ +#define HVMOP_set_evtchn_upcall_vector 23 +struct xen_hvm_evtchn_upcall_vector { + uint32_t vcpu; + uint8_t vector; +}; +typedef struct xen_hvm_evtchn_upcall_vector xen_hvm_evtchn_upcall_vector_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_evtchn_upcall_vector_t); + +#endif /* defined(__i386__) || defined(__x86_64__) */ + +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/hvm/hvm_xs_strings.h xen-4.6.5/extras/mini-os/include/xen/hvm/hvm_xs_strings.h --- xen-4.6.0/extras/mini-os/include/xen/hvm/hvm_xs_strings.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/hvm/hvm_xs_strings.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,80 @@ +/****************************************************************************** + * hvm/hvm_xs_strings.h + * + * HVM xenstore strings used in HVMLOADER. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ +#define __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ + +#define HVM_XS_HVMLOADER "hvmloader" +#define HVM_XS_BIOS "hvmloader/bios" +#define HVM_XS_GENERATION_ID_ADDRESS "hvmloader/generation-id-address" +#define HVM_XS_ALLOW_MEMORY_RELOCATE "hvmloader/allow-memory-relocate" + +/* The following values allow additional ACPI tables to be added to the + * virtual ACPI BIOS that hvmloader constructs. The values specify the guest + * physical address and length of a block of ACPI tables to add. The format of + * the block is simply concatenated raw tables (which specify their own length + * in the ACPI header). + */ +#define HVM_XS_ACPI_PT_ADDRESS "hvmloader/acpi/address" +#define HVM_XS_ACPI_PT_LENGTH "hvmloader/acpi/length" + +/* Any number of SMBIOS types can be passed through to an HVM guest using + * the following xenstore values. The values specify the guest physical + * address and length of a block of SMBIOS structures for hvmloader to use. + * The block is formatted in the following way: + * + * ... + * + * Each length separator is a 32b integer indicating the length of the next + * SMBIOS structure. For DMTF defined types (0 - 121), the passed in struct + * will replace the default structure in hvmloader. In addition, any + * OEM/vendortypes (128 - 255) will all be added. + */ +#define HVM_XS_SMBIOS_PT_ADDRESS "hvmloader/smbios/address" +#define HVM_XS_SMBIOS_PT_LENGTH "hvmloader/smbios/length" + +/* Set to 1 to enable SMBIOS default portable battery (type 22) values. */ +#define HVM_XS_SMBIOS_DEFAULT_BATTERY "hvmloader/smbios/default_battery" + +/* The following xenstore values are used to override some of the default + * string values in the SMBIOS table constructed in hvmloader. + */ +#define HVM_XS_BIOS_STRINGS "bios-strings" +#define HVM_XS_BIOS_VENDOR "bios-strings/bios-vendor" +#define HVM_XS_BIOS_VERSION "bios-strings/bios-version" +#define HVM_XS_SYSTEM_MANUFACTURER "bios-strings/system-manufacturer" +#define HVM_XS_SYSTEM_PRODUCT_NAME "bios-strings/system-product-name" +#define HVM_XS_SYSTEM_VERSION "bios-strings/system-version" +#define HVM_XS_SYSTEM_SERIAL_NUMBER "bios-strings/system-serial-number" +#define HVM_XS_ENCLOSURE_MANUFACTURER "bios-strings/enclosure-manufacturer" +#define HVM_XS_ENCLOSURE_SERIAL_NUMBER "bios-strings/enclosure-serial-number" +#define HVM_XS_BATTERY_MANUFACTURER "bios-strings/battery-manufacturer" +#define HVM_XS_BATTERY_DEVICE_NAME "bios-strings/battery-device-name" + +/* 1 to 99 OEM strings can be set in xenstore using values of the form + * below. These strings will be loaded into the SMBIOS type 11 structure. + */ +#define HVM_XS_OEM_STRINGS "bios-strings/oem-%d" + +#endif /* __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/hvm/ioreq.h xen-4.6.5/extras/mini-os/include/xen/hvm/ioreq.h --- xen-4.6.0/extras/mini-os/include/xen/hvm/ioreq.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/hvm/ioreq.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,129 @@ +/* + * ioreq.h: I/O request definitions for device models + * Copyright (c) 2004, Intel Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _IOREQ_H_ +#define _IOREQ_H_ + +#define IOREQ_READ 1 +#define IOREQ_WRITE 0 + +#define STATE_IOREQ_NONE 0 +#define STATE_IOREQ_READY 1 +#define STATE_IOREQ_INPROCESS 2 +#define STATE_IORESP_READY 3 + +#define IOREQ_TYPE_PIO 0 /* pio */ +#define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_PCI_CONFIG 2 +#define IOREQ_TYPE_TIMEOFFSET 7 +#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ + +/* + * VMExit dispatcher should cooperate with instruction decoder to + * prepare this structure and notify service OS and DM by sending + * virq. + * + * For I/O type IOREQ_TYPE_PCI_CONFIG, the physical address is formatted + * as follows: + * + * 63....48|47..40|39..35|34..32|31........0 + * SEGMENT |BUS |DEV |FN |OFFSET + */ +struct ioreq { + uint64_t addr; /* physical address */ + uint64_t data; /* data (or paddr of data) */ + uint32_t count; /* for rep prefixes */ + uint32_t size; /* size in bytes */ + uint32_t vp_eport; /* evtchn for notifications to/from device model */ + uint16_t _pad0; + uint8_t state:4; + uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr + * of the real data to use. */ + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t df:1; + uint8_t _pad1:1; + uint8_t type; /* I/O type */ +}; +typedef struct ioreq ioreq_t; + +struct shared_iopage { + struct ioreq vcpu_ioreq[1]; +}; +typedef struct shared_iopage shared_iopage_t; + +struct buf_ioreq { + uint8_t type; /* I/O type */ + uint8_t pad:1; + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */ + uint32_t addr:20;/* physical address */ + uint32_t data; /* data */ +}; +typedef struct buf_ioreq buf_ioreq_t; + +#define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte indexes */ +struct buffered_iopage { + unsigned int read_pointer; + unsigned int write_pointer; + buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM]; +}; /* NB. Size of this structure must be no greater than one page. */ +typedef struct buffered_iopage buffered_iopage_t; + +/* + * ACPI Control/Event register locations. Location is controlled by a + * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION. + */ + +/* Version 0 (default): Traditional Xen locations. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V0 (ACPI_PM_TMR_BLK_ADDRESS_V0 + 0x20) +#define ACPI_GPE0_BLK_LEN_V0 0x08 + +/* Version 1: Locations preferred by modern Qemu. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V1 0xafe0 +#define ACPI_GPE0_BLK_LEN_V1 0x04 + +/* Compatibility definitions for the default location (version 0). */ +#define ACPI_PM1A_EVT_BLK_ADDRESS ACPI_PM1A_EVT_BLK_ADDRESS_V0 +#define ACPI_PM1A_CNT_BLK_ADDRESS ACPI_PM1A_CNT_BLK_ADDRESS_V0 +#define ACPI_PM_TMR_BLK_ADDRESS ACPI_PM_TMR_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_ADDRESS ACPI_GPE0_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_LEN ACPI_GPE0_BLK_LEN_V0 + + +#endif /* _IOREQ_H_ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/hvm/params.h xen-4.6.5/extras/mini-os/include/xen/hvm/params.h --- xen-4.6.0/extras/mini-os/include/xen/hvm/params.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/hvm/params.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,199 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ +#define __XEN_PUBLIC_HVM_PARAMS_H__ + +#include "hvm_op.h" + +/* + * Parameter space for HVMOP_{set,get}_param. + */ + +/* + * How should CPU0 event-channel notifications be delivered? + * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). + * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: + * Domain = val[47:32], Bus = val[31:16], + * DevFn = val[15: 8], IntX = val[ 1: 0] + * val[63:56] == 2: val[7:0] is a vector number, check for + * XENFEAT_hvm_callback_vector to know if this delivery + * method is available. + * If val == 0 then CPU0 event-channel notifications are not delivered. + */ +#define HVM_PARAM_CALLBACK_IRQ 0 + +/* + * These are not used by Xen. They are here for convenience of HVM-guest + * xenbus implementations. + */ +#define HVM_PARAM_STORE_PFN 1 +#define HVM_PARAM_STORE_EVTCHN 2 + +#define HVM_PARAM_PAE_ENABLED 4 + +#define HVM_PARAM_IOREQ_PFN 5 + +#define HVM_PARAM_BUFIOREQ_PFN 6 +#define HVM_PARAM_BUFIOREQ_EVTCHN 26 + +#if defined(__i386__) || defined(__x86_64__) + +/* + * Viridian enlightenments + * + * (See http://download.microsoft.com/download/A/B/4/AB43A34E-BDD0-4FA6-BDEF-79EEF16E880B/Hypervisor%20Top%20Level%20Functional%20Specification%20v4.0.docx) + * + * To expose viridian enlightenments to the guest set this parameter + * to the desired feature mask. The base feature set must be present + * in any valid feature mask. + */ +#define HVM_PARAM_VIRIDIAN 9 + +/* Base+Freq viridian feature sets: + * + * - Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) + * - APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) + * - Virtual Processor index MSR (HV_X64_MSR_VP_INDEX) + * - Timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and + * HV_X64_MSR_APIC_FREQUENCY) + */ +#define _HVMPV_base_freq 0 +#define HVMPV_base_freq (1 << _HVMPV_base_freq) + +/* Feature set modifications */ + +/* Disable timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and + * HV_X64_MSR_APIC_FREQUENCY). + * This modification restores the viridian feature set to the + * original 'base' set exposed in releases prior to Xen 4.4. + */ +#define _HVMPV_no_freq 1 +#define HVMPV_no_freq (1 << _HVMPV_no_freq) + +/* Enable Partition Time Reference Counter (HV_X64_MSR_TIME_REF_COUNT) */ +#define _HVMPV_time_ref_count 2 +#define HVMPV_time_ref_count (1 << _HVMPV_time_ref_count) + +/* Enable Reference TSC Page (HV_X64_MSR_REFERENCE_TSC) */ +#define _HVMPV_reference_tsc 3 +#define HVMPV_reference_tsc (1 << _HVMPV_reference_tsc) + +#define HVMPV_feature_mask \ + (HVMPV_base_freq | \ + HVMPV_no_freq | \ + HVMPV_time_ref_count | \ + HVMPV_reference_tsc) + +#endif + +/* + * Set mode for virtual timers (currently x86 only): + * delay_for_missed_ticks (default): + * Do not advance a vcpu's time beyond the correct delivery time for + * interrupts that have been missed due to preemption. Deliver missed + * interrupts when the vcpu is rescheduled and advance the vcpu's virtual + * time stepwise for each one. + * no_delay_for_missed_ticks: + * As above, missed interrupts are delivered, but guest time always tracks + * wallclock (i.e., real) time while doing so. + * no_missed_ticks_pending: + * No missed interrupts are held pending. Instead, to ensure ticks are + * delivered at some non-zero rate, if we detect missed ticks then the + * internal tick alarm is not disabled if the VCPU is preempted during the + * next tick period. + * one_missed_tick_pending: + * Missed interrupts are collapsed together and delivered as one 'late tick'. + * Guest time always tracks wallclock (i.e., real) time. + */ +#define HVM_PARAM_TIMER_MODE 10 +#define HVMPTM_delay_for_missed_ticks 0 +#define HVMPTM_no_delay_for_missed_ticks 1 +#define HVMPTM_no_missed_ticks_pending 2 +#define HVMPTM_one_missed_tick_pending 3 + +/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ +#define HVM_PARAM_HPET_ENABLED 11 + +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ +#define HVM_PARAM_IDENT_PT 12 + +/* Device Model domain, defaults to 0. */ +#define HVM_PARAM_DM_DOMAIN 13 + +/* ACPI S state: currently support S0 and S3 on x86. */ +#define HVM_PARAM_ACPI_S_STATE 14 + +/* TSS used on Intel when CR0.PE=0. */ +#define HVM_PARAM_VM86_TSS 15 + +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ +#define HVM_PARAM_VPT_ALIGN 16 + +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN 17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 + +/* + * Select location of ACPI PM1a and TMR control blocks. Currently two locations + * are supported, specified by version 0 or 1 in this parameter: + * - 0: default, use the old addresses + * PM1A_EVT == 0x1f40; PM1A_CNT == 0x1f44; PM_TMR == 0x1f48 + * - 1: use the new default qemu addresses + * PM1A_EVT == 0xb000; PM1A_CNT == 0xb004; PM_TMR == 0xb008 + * You can find these address definitions in + */ +#define HVM_PARAM_ACPI_IOPORTS_LOCATION 19 + +/* Enable blocking memory events, async or sync (pause vcpu until response) + * onchangeonly indicates messages only on a change of value */ +#define HVM_PARAM_MEMORY_EVENT_CR0 20 +#define HVM_PARAM_MEMORY_EVENT_CR3 21 +#define HVM_PARAM_MEMORY_EVENT_CR4 22 +#define HVM_PARAM_MEMORY_EVENT_INT3 23 +#define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP 25 +#define HVM_PARAM_MEMORY_EVENT_MSR 30 + +#define HVMPME_MODE_MASK (3 << 0) +#define HVMPME_mode_disabled 0 +#define HVMPME_mode_async 1 +#define HVMPME_mode_sync 2 +#define HVMPME_onchangeonly (1 << 2) + +/* Boolean: Enable nestedhvm (hvm only) */ +#define HVM_PARAM_NESTEDHVM 24 + +/* Params for the mem event rings */ +#define HVM_PARAM_PAGING_RING_PFN 27 +#define HVM_PARAM_ACCESS_RING_PFN 28 +#define HVM_PARAM_SHARING_RING_PFN 29 + +/* SHUTDOWN_* action in case of a triple fault */ +#define HVM_PARAM_TRIPLE_FAULT_REASON 31 + +#define HVM_PARAM_IOREQ_SERVER_PFN 32 +#define HVM_PARAM_NR_IOREQ_SERVER_PAGES 33 + +/* Location of the VM Generation ID in guest physical address space. */ +#define HVM_PARAM_VM_GENERATION_ID_ADDR 34 + +#define HVM_NR_PARAMS 35 + +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/hvm/pvdrivers.h xen-4.6.5/extras/mini-os/include/xen/hvm/pvdrivers.h --- xen-4.6.0/extras/mini-os/include/xen/hvm/pvdrivers.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/hvm/pvdrivers.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,49 @@ +/* + * pvdrivers.h: Register of PV drivers product numbers. + * Copyright (c) 2012, Citrix Systems Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _XEN_PUBLIC_PVDRIVERS_H_ +#define _XEN_PUBLIC_PVDRIVERS_H_ + +/* + * This is the master registry of product numbers for + * PV drivers. + * If you need a new product number allocating, please + * post to xen-devel@lists.xensource.com. You should NOT use + * a product number without allocating one. + * If you maintain a separate versioning and distribution path + * for PV drivers you should have a separate product number so + * that your drivers can be separated from others. + * + * During development, you may use the product ID to + * indicate a driver which is yet to be released. + */ + +#define PVDRIVERS_PRODUCT_LIST(EACH) \ + EACH("xensource-windows", 0x0001) /* Citrix */ \ + EACH("gplpv-windows", 0x0002) /* James Harper */ \ + EACH("linux", 0x0003) \ + EACH("xenserver-windows-v7.0+", 0x0004) /* Citrix */ \ + EACH("xenserver-windows-v7.2+", 0x0005) /* Citrix */ \ + EACH("experimental", 0xffff) + +#endif /* _XEN_PUBLIC_PVDRIVERS_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/hvm/save.h xen-4.6.5/extras/mini-os/include/xen/hvm/save.h --- xen-4.6.0/extras/mini-os/include/xen/hvm/save.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/hvm/save.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,111 @@ +/* + * hvm/save.h + * + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * Copyright (c) 2007 XenSource Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_H__ +#define __XEN_PUBLIC_HVM_SAVE_H__ + +/* + * Structures in this header *must* have the same layout in 32bit + * and 64bit environments: this means that all fields must be explicitly + * sized types and aligned to their sizes, and the structs must be + * a multiple of eight bytes long. + * + * Only the state necessary for saving and restoring (i.e. fields + * that are analogous to actual hardware state) should go in this file. + * Internal mechanisms should be kept in Xen-private headers. + */ + +#if !defined(__GNUC__) || defined(__STRICT_ANSI__) +#error "Anonymous structs/unions are a GNU extension." +#endif + +/* + * Each entry is preceded by a descriptor giving its type and length + */ +struct hvm_save_descriptor { + uint16_t typecode; /* Used to demux the various types below */ + uint16_t instance; /* Further demux within a type */ + uint32_t length; /* In bytes, *not* including this descriptor */ +}; + + +/* + * Each entry has a datatype associated with it: for example, the CPU state + * is saved as a HVM_SAVE_TYPE(CPU), which has HVM_SAVE_LENGTH(CPU), + * and is identified by a descriptor with typecode HVM_SAVE_CODE(CPU). + * DECLARE_HVM_SAVE_TYPE binds these things together with some type-system + * ugliness. + */ + +#ifdef __XEN__ +# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { return _fix(h); } \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}; \ + struct __HVM_SAVE_TYPE_COMPAT_##_x { _ctype t; } + +# include /* BUG() */ +# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { BUG(); return -1; } \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}; \ + struct __HVM_SAVE_TYPE_COMPAT_##_x { _type t; } +#else +# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];} + +# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];} +#endif + +#define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t) +#define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x))) +#define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c)) + +#ifdef __XEN__ +# define HVM_SAVE_TYPE_COMPAT(_x) typeof (((struct __HVM_SAVE_TYPE_COMPAT_##_x *)(0))->t) +# define HVM_SAVE_LENGTH_COMPAT(_x) (sizeof (HVM_SAVE_TYPE_COMPAT(_x))) + +# define HVM_SAVE_HAS_COMPAT(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->cpt)-1) +# define HVM_SAVE_FIX_COMPAT(_x, _dst) __HVM_SAVE_FIX_COMPAT_##_x(_dst) +#endif + +/* + * The series of save records is teminated by a zero-type, zero-length + * descriptor. + */ + +struct hvm_save_end {}; +DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end); + +#if defined(__i386__) || defined(__x86_64__) +#include "../arch-x86/hvm/save.h" +#elif defined(__arm__) || defined(__aarch64__) +#include "../arch-arm/hvm/save.h" +#else +#error "unsupported architecture" +#endif + +#endif /* __XEN_PUBLIC_HVM_SAVE_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/blkif.h xen-4.6.5/extras/mini-os/include/xen/io/blkif.h --- xen-4.6.0/extras/mini-os/include/xen/io/blkif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/blkif.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,640 @@ +/****************************************************************************** + * blkif.h + * + * Unified block-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + * Copyright (c) 2012, Spectra Logic Corporation + */ + +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ +#define __XEN_PUBLIC_IO_BLKIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Front->back notifications: When enqueuing a new request, sending a + * notification can be made conditional on req_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: When enqueuing a new response, sending a + * notification can be made conditional on rsp_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + */ + +#ifndef blkif_vdev_t +#define blkif_vdev_t uint16_t +#endif +#define blkif_sector_t uint64_t + +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen block driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * All data in the XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + * XenStore nodes marked "DEPRECATED" in their notes section should only be + * used to provide interoperability with legacy implementations. + * + * See the XenBus state transition diagram below for details on when XenBus + * nodes must be published and when they can be queried. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * mode + * Values: "r" (read only), "w" (writable) + * + * The read or write access permissions to the backing store to be + * granted to the frontend. + * + * params + * Values: string + * + * A free formatted string providing sufficient information for the + * backend driver to open the backing device. (e.g. the path to the + * file or block device representing the backing store.) + * + * type + * Values: "file", "phy", "tap" + * + * The type of the backing device/object. + * + * + * direct-io-safe + * Values: 0/1 (boolean) + * Default Value: 0 + * + * The underlying storage is not affected by the direct IO memory + * lifetime bug. See: + * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html + * + * Therefore this option gives the backend permission to use + * O_DIRECT, notwithstanding that bug. + * + * That is, if this option is enabled, use of O_DIRECT is safe, + * in circumstances where we would normally have avoided it as a + * workaround for that bug. This option is not relevant for all + * backends, and even not necessarily supported for those for + * which it is relevant. A backend which knows that it is not + * affected by the bug can ignore this option. + * + * This option doesn't require a backend to use O_DIRECT, so it + * should not be used to try to control the caching behaviour. + * + *--------------------------------- Features --------------------------------- + * + * feature-barrier + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-flush-cache + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-discard + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_DISCARD request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7 + * + * A value of "1" indicates that the backend can keep the grants used + * by the frontend driver mapped, so the same set of grants should be + * used in all transactions. The maximum number of grants the backend + * can map persistently depends on the implementation, but ideally it + * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this + * feature the backend doesn't need to unmap each grant, preventing + * costly TLB flushes. The backend driver should only map grants + * persistently if the frontend supports it. If a backend driver chooses + * to use the persistent protocol when the frontend doesn't support it, + * it will probably hit the maximum number of persistently mapped grants + * (due to the fact that the frontend won't be reusing the same grants), + * and fall back to non-persistent mode. Backend implementations may + * shrink or expand the number of persistently mapped grants without + * notifying the frontend depending on memory constraints (this might + * cause a performance degradation). + * + * If a backend driver wants to limit the maximum number of persistently + * mapped grants to a value less than RING_SIZE * + * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to + * discard the grants that are less commonly used. Using a LRU in the + * backend driver paired with a LIFO queue in the frontend will + * allow us to have better performance in this scenario. + * + *----------------------- Request Transport Parameters ------------------------ + * + * max-ring-page-order + * Values: + * Default Value: 0 + * Notes: 1, 3 + * + * The maximum supported size of the request ring buffer in units of + * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * max-ring-pages + * Values: + * Default Value: 1 + * Notes: DEPRECATED, 2, 3 + * + * The maximum supported size of the request ring buffer in units of + * machine pages. The value must be a power of 2. + * + *------------------------- Backend Device Properties ------------------------- + * + * discard-enable + * Values: 0/1 (boolean) + * Default Value: 1 + * + * This optional property, set by the toolstack, instructs the backend + * to offer discard to the frontend. If the property is missing the + * backend should offer discard if the backing storage actually supports + * it. This optional property, set by the toolstack, requests that the + * backend offer, or not offer, discard to the frontend. + * + * discard-alignment + * Values: + * Default Value: 0 + * Notes: 4, 5 + * + * The offset, in bytes from the beginning of the virtual block device, + * to the first, addressable, discard extent on the underlying device. + * + * discard-granularity + * Values: + * Default Value: <"sector-size"> + * Notes: 4 + * + * The size, in bytes, of the individually addressable discard extents + * of the underlying device. + * + * discard-secure + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 10 + * + * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD + * requests with the BLKIF_DISCARD_SECURE flag set. + * + * info + * Values: (bitmap) + * + * A collection of bit flags describing attributes of the backing + * device. The VDISK_* macros define the meaning of each bit + * location. + * + * sector-size + * Values: + * + * The logical sector size, in bytes, of the backend device. + * + * physical-sector-size + * Values: + * + * The physical sector size, in bytes, of the backend device. + * + * sectors + * Values: + * + * The size of the backend device, expressed in units of its logical + * sector size ("sector-size"). + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * ring-ref + * Values: + * Notes: 6 + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. + * + * ring-ref%u + * Values: + * Notes: 6 + * + * For a frontend providing a multi-page ring, a "number of ring pages" + * sized list of nodes, each containing a Xen grant reference granting + * permission for the backend to map the page of the ring located + * at page index "%u". Page indexes are zero based. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + * + * ring-page-order + * Values: + * Default Value: 0 + * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order) + * Notes: 1, 3 + * + * The size of the frontend allocated request ring buffer in units + * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * num-ring-pages + * Values: + * Default Value: 1 + * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order)) + * Notes: DEPRECATED, 2, 3 + * + * The size of the frontend allocated request ring buffer in units of + * machine pages. The value must be a power of 2. + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7, 8, 9 + * + * A value of "1" indicates that the frontend will reuse the same grants + * for all transactions, allowing the backend to map them with write + * access (even when it should be read-only). If the frontend hits the + * maximum number of allowed persistently mapped grants, it can fallback + * to non persistent mode. This will cause a performance degradation, + * since the the backend driver will still try to map those grants + * persistently. Since the persistent grants protocol is compatible with + * the previous protocol, a frontend driver can choose to work in + * persistent mode even when the backend doesn't support it. + * + * It is recommended that the frontend driver stores the persistently + * mapped grants in a LIFO queue, so a subset of all persistently mapped + * grants gets used commonly. This is done in case the backend driver + * decides to limit the maximum number of persistently mapped grants + * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + * + *------------------------- Virtual Device Properties ------------------------- + * + * device-type + * Values: "disk", "cdrom", "floppy", etc. + * + * virtual-device + * Values: + * + * A value indicating the physical device to virtualize within the + * frontend's domain. (e.g. "The first ATA disk", "The third SCSI + * disk", etc.) + * + * See docs/misc/vbd-interface.txt for details on the format of this + * value. + * + * Notes + * ----- + * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer + * PV drivers. + * (2) Multi-page ring buffer scheme first used in some RedHat distributions + * including a distribution deployed on certain nodes of the Amazon + * EC2 cluster. + * (3) Support for multi-page ring buffers was implemented independently, + * in slightly different forms, by both Citrix and RedHat/Amazon. + * For full interoperability, block front and backends should publish + * identical ring parameters, adjusted for unit differences, to the + * XenStore nodes used in both schemes. + * (4) Devices that support discard functionality may internally allocate space + * (discardable extents) in units that are larger than the exported logical + * block size. If the backing device has such discardable extents the + * backend should provide both discard-granularity and discard-alignment. + * Providing just one of the two may be considered an error by the frontend. + * Backends supporting discard should include discard-granularity and + * discard-alignment even if it supports discarding individual sectors. + * Frontends should assume discard-alignment == 0 and discard-granularity + * == sector size if these keys are missing. + * (5) The discard-alignment parameter allows a physical device to be + * partitioned into virtual devices that do not necessarily begin or + * end on a discardable extent boundary. + * (6) When there is only a single page allocated to the request ring, + * 'ring-ref' is used to communicate the grant reference for this + * page to the backend. When using a multi-page ring, the 'ring-ref' + * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. + * (7) When using persistent grants data has to be copied from/to the page + * where the grant is currently mapped. The overhead of doing this copy + * however doesn't suppress the speed improvement of not having to unmap + * the grants. + * (8) The frontend driver has to allow the backend driver to map all grants + * with write access, even when they should be mapped read-only, since + * further requests may reuse these grants and require write permissions. + * (9) Linux implementation doesn't have a limit on the maximum number of + * grants that can be persistently mapped in the frontend driver, but + * due to the frontent driver implementation it should never be bigger + * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + *(10) The discard-secure property may be present and will be set to 1 if the + * backing device supports secure discard. + */ + +/* + * STATE DIAGRAMS + * + ***************************************************************************** + * Startup * + ***************************************************************************** + * + * Tool stack creates front and back nodes with state XenbusStateInitialising. + * + * Front Back + * ================================= ===================================== + * XenbusStateInitialising XenbusStateInitialising + * o Query virtual device o Query backend device identification + * properties. data. + * o Setup OS device instance. o Open and validate backend device. + * o Publish backend features and + * transport parameters. + * | + * | + * V + * XenbusStateInitWait + * + * o Query backend features and + * transport parameters. + * o Allocate and initialize the + * request ring. + * o Publish transport parameters + * that will be in effect during + * this connection. + * | + * | + * V + * XenbusStateInitialised + * + * o Query frontend transport parameters. + * o Connect to the request ring and + * event channel. + * o Publish backend device properties. + * | + * | + * V + * XenbusStateConnected + * + * o Query backend device properties. + * o Finalize OS virtual device + * instance. + * | + * | + * V + * XenbusStateConnected + * + * Note: Drivers that do not support any optional features, or the negotiation + * of transport parameters, can skip certain states in the state machine: + * + * o A frontend may transition to XenbusStateInitialised without + * waiting for the backend to enter XenbusStateInitWait. In this + * case, default transport parameters are in effect and any + * transport parameters published by the frontend must contain + * their default values. + * + * o A backend may transition to XenbusStateInitialised, bypassing + * XenbusStateInitWait, without waiting for the frontend to first + * enter the XenbusStateInitialised state. In this case, default + * transport parameters are in effect and any transport parameters + * published by the backend must contain their default values. + * + * Drivers that support optional features and/or transport parameter + * negotiation must tolerate these additional state transition paths. + * In general this means performing the work of any skipped state + * transition, if it has not already been performed, in addition to the + * work associated with entry into the current state. + */ + +/* + * REQUEST CODES. + */ +#define BLKIF_OP_READ 0 +#define BLKIF_OP_WRITE 1 +/* + * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER + * operation code ("barrier request") must be completed prior to the + * execution of the barrier request. All writes issued after the barrier + * request must not execute until after the completion of the barrier request. + * + * Optional. See "feature-barrier" XenBus node documentation above. + */ +#define BLKIF_OP_WRITE_BARRIER 2 +/* + * Commit any uncommitted contents of the backing device's volatile cache + * to stable storage. + * + * Optional. See "feature-flush-cache" XenBus node documentation above. + */ +#define BLKIF_OP_FLUSH_DISKCACHE 3 +/* + * Used in SLES sources for device specific command packet + * contained within the request. Reserved for that purpose. + */ +#define BLKIF_OP_RESERVED_1 4 +/* + * Indicate to the backend device that a region of storage is no longer in + * use, and may be discarded at any time without impact to the client. If + * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the + * discarded region on the device must be rendered unrecoverable before the + * command returns. + * + * This operation is analogous to performing a trim (ATA) or unamp (SCSI), + * command on a native device. + * + * More information about trim/unmap operations can be found at: + * http://t13.org/Documents/UploadedDocuments/docs2008/ + * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc + * http://www.seagate.com/staticfiles/support/disc/manuals/ + * Interface%20manuals/100293068c.pdf + * + * Optional. See "feature-discard", "discard-alignment", + * "discard-granularity", and "discard-secure" in the XenBus node + * documentation above. + */ +#define BLKIF_OP_DISCARD 5 + +/* + * Recognized if "feature-max-indirect-segments" in present in the backend + * xenbus info. The "feature-max-indirect-segments" node contains the maximum + * number of segments allowed by the backend per request. If the node is + * present, the frontend might use blkif_request_indirect structs in order to + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The + * maximum number of indirect segments is fixed by the backend, but the + * frontend can issue requests with any number of indirect segments as long as + * it's less than the number provided by the backend. The indirect_grefs field + * in blkif_request_indirect should be filled by the frontend with the + * grant references of the pages that are holding the indirect segments. + * These pages are filled with an array of blkif_request_segment that hold the + * information about the segments. The number of indirect pages to use is + * determined by the number of segments an indirect request contains. Every + * indirect page can contain a maximum of + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to + * calculate the number of indirect pages to use we have to do + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). + * + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* + * create the "feature-max-indirect-segments" node! + */ +#define BLKIF_OP_INDIRECT 6 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. + */ +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 + +/* + * Maximum number of indirect pages to use per request. + */ +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 + +/* + * NB. first_sect and last_sect in blkif_request_segment, as well as + * sector_number in blkif_request, are always expressed in 512-byte units. + * However they must be properly aligned to the real sector size of the + * physical disk, which is reported in the "physical-sector-size" node in + * the backend xenbus info. Also the xenbus "sectors" node is expressed in + * 512-byte units. + */ +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; + +/* + * Starting ring element for any I/O request. + */ +struct blkif_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct blkif_request blkif_request_t; + +/* + * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD + * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) + */ +struct blkif_request_discard { + uint8_t operation; /* BLKIF_OP_DISCARD */ + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ +#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ + blkif_vdev_t handle; /* same as for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk */ + uint64_t nr_sectors; /* number of contiguous sectors to discard*/ +}; +typedef struct blkif_request_discard blkif_request_discard_t; + +struct blkif_request_indirect { + uint8_t operation; /* BLKIF_OP_INDIRECT */ + uint8_t indirect_op; /* BLKIF_OP_{READ/WRITE} */ + uint16_t nr_segments; /* number of segments */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + blkif_vdev_t handle; /* same as for read/write requests */ + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; +#ifdef __i386__ + uint64_t pad; /* Make it 64 byte aligned on i386 */ +#endif +}; +typedef struct blkif_request_indirect blkif_request_indirect_t; + +struct blkif_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_response blkif_response_t; + +/* + * STATUS RETURN CODES. + */ + /* Operation not supported (only happens on barrier writes). */ +#define BLKIF_RSP_EOPNOTSUPP -2 + /* Operation failed for some unspecified reason (-EIO). */ +#define BLKIF_RSP_ERROR -1 + /* Operation completed successfully. */ +#define BLKIF_RSP_OKAY 0 + +/* + * Generate blkif ring structures and types. + */ +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); + +#define VDISK_CDROM 0x1 +#define VDISK_REMOVABLE 0x2 +#define VDISK_READONLY 0x4 + +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/console.h xen-4.6.5/extras/mini-os/include/xen/io/console.h --- xen-4.6.0/extras/mini-os/include/xen/io/console.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/console.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,51 @@ +/****************************************************************************** + * console.h + * + * Console I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ +#define __XEN_PUBLIC_IO_CONSOLE_H__ + +typedef uint32_t XENCONS_RING_IDX; + +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1)) + +struct xencons_interface { + char in[1024]; + char out[2048]; + XENCONS_RING_IDX in_cons, in_prod; + XENCONS_RING_IDX out_cons, out_prod; +}; + +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/fbif.h xen-4.6.5/extras/mini-os/include/xen/io/fbif.h --- xen-4.6.0/extras/mini-os/include/xen/io/fbif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/fbif.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,176 @@ +/* + * fbif.h -- Xen virtual frame buffer device + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster + */ + +#ifndef __XEN_PUBLIC_IO_FBIF_H__ +#define __XEN_PUBLIC_IO_FBIF_H__ + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + */ + +/* Event type 1 currently not used */ +/* + * Framebuffer update notification event + * Capable frontend sets feature-update in xenstore. + * Backend requests it by setting request-update in xenstore. + */ +#define XENFB_TYPE_UPDATE 2 + +struct xenfb_update +{ + uint8_t type; /* XENFB_TYPE_UPDATE */ + int32_t x; /* source x */ + int32_t y; /* source y */ + int32_t width; /* rect width */ + int32_t height; /* rect height */ +}; + +/* + * Framebuffer resize notification event + * Capable backend sets feature-resize in xenstore. + */ +#define XENFB_TYPE_RESIZE 3 + +struct xenfb_resize +{ + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* offset of the framebuffer in bytes */ +}; + +#define XENFB_OUT_EVENT_SIZE 40 + +union xenfb_out_event +{ + uint8_t type; + struct xenfb_update update; + struct xenfb_resize resize; + char pad[XENFB_OUT_EVENT_SIZE]; +}; + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + */ + +/* + * Framebuffer refresh period advice + * Backend sends it to advise the frontend their preferred period of + * refresh. Frontends that keep the framebuffer constantly up-to-date + * just ignore it. Frontends that use the advice should immediately + * refresh the framebuffer (and send an update notification event if + * those have been requested), then use the update frequency to guide + * their periodical refreshs. + */ +#define XENFB_TYPE_REFRESH_PERIOD 1 +#define XENFB_NO_REFRESH 0 + +struct xenfb_refresh_period +{ + uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */ + uint32_t period; /* period of refresh, in ms, + * XENFB_NO_REFRESH if no refresh is needed */ +}; + +#define XENFB_IN_EVENT_SIZE 40 + +union xenfb_in_event +{ + uint8_t type; + struct xenfb_refresh_period refresh_period; + char pad[XENFB_IN_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENFB_IN_RING_SIZE 1024 +#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE) +#define XENFB_IN_RING_OFFS 1024 +#define XENFB_IN_RING(page) \ + ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) +#define XENFB_IN_RING_REF(page, idx) \ + (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) + +#define XENFB_OUT_RING_SIZE 2048 +#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE) +#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE) +#define XENFB_OUT_RING(page) \ + ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) +#define XENFB_OUT_RING_REF(page, idx) \ + (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) + +struct xenfb_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; + + int32_t width; /* the width of the framebuffer (in pixels) */ + int32_t height; /* the height of the framebuffer (in pixels) */ + uint32_t line_length; /* the length of a row of pixels (in bytes) */ + uint32_t mem_length; /* the length of the framebuffer (in bytes) */ + uint8_t depth; /* the depth of a pixel (in bits) */ + + /* + * Framebuffer page directory + * + * Each directory page holds PAGE_SIZE / sizeof(*pd) + * framebuffer pages, and can thus map up to PAGE_SIZE * + * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs + * 64 bit. 256 directories give enough room for a 512 Meg + * framebuffer with a max resolution of 12,800x10,240. Should + * be enough for a while with room leftover for expansion. + */ + unsigned long pd[256]; +}; + +/* + * Wart: xenkbd needs to know default resolution. Put it here until a + * better solution is found, but don't leak it to the backend. + */ +#ifdef __KERNEL__ +#define XENFB_WIDTH 800 +#define XENFB_HEIGHT 600 +#define XENFB_DEPTH 32 +#endif + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/fsif.h xen-4.6.5/extras/mini-os/include/xen/io/fsif.h --- xen-4.6.0/extras/mini-os/include/xen/io/fsif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/fsif.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,192 @@ +/****************************************************************************** + * fsif.h + * + * Interface to FS level split device drivers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007, Grzegorz Milos, . + */ + +#ifndef __XEN_PUBLIC_IO_FSIF_H__ +#define __XEN_PUBLIC_IO_FSIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +#define REQ_FILE_OPEN 1 +#define REQ_FILE_CLOSE 2 +#define REQ_FILE_READ 3 +#define REQ_FILE_WRITE 4 +#define REQ_STAT 5 +#define REQ_FILE_TRUNCATE 6 +#define REQ_REMOVE 7 +#define REQ_RENAME 8 +#define REQ_CREATE 9 +#define REQ_DIR_LIST 10 +#define REQ_CHMOD 11 +#define REQ_FS_SPACE 12 +#define REQ_FILE_SYNC 13 + +struct fsif_open_request { + grant_ref_t gref; +}; + +struct fsif_close_request { + uint32_t fd; +}; + +struct fsif_read_request { + uint32_t fd; + int32_t pad; + uint64_t len; + uint64_t offset; + grant_ref_t grefs[1]; /* Variable length */ +}; + +struct fsif_write_request { + uint32_t fd; + int32_t pad; + uint64_t len; + uint64_t offset; + grant_ref_t grefs[1]; /* Variable length */ +}; + +struct fsif_stat_request { + uint32_t fd; +}; + +/* This structure is a copy of some fields from stat structure, returned + * via the ring. */ +struct fsif_stat_response { + int32_t stat_mode; + uint32_t stat_uid; + uint32_t stat_gid; + int32_t stat_ret; + int64_t stat_size; + int64_t stat_atime; + int64_t stat_mtime; + int64_t stat_ctime; +}; + +struct fsif_truncate_request { + uint32_t fd; + int32_t pad; + int64_t length; +}; + +struct fsif_remove_request { + grant_ref_t gref; +}; + +struct fsif_rename_request { + uint16_t old_name_offset; + uint16_t new_name_offset; + grant_ref_t gref; +}; + +struct fsif_create_request { + int8_t directory; + int8_t pad; + int16_t pad2; + int32_t mode; + grant_ref_t gref; +}; + +struct fsif_list_request { + uint32_t offset; + grant_ref_t gref; +}; + +#define NR_FILES_SHIFT 0 +#define NR_FILES_SIZE 16 /* 16 bits for the number of files mask */ +#define NR_FILES_MASK (((1ULL << NR_FILES_SIZE) - 1) << NR_FILES_SHIFT) +#define ERROR_SIZE 32 /* 32 bits for the error mask */ +#define ERROR_SHIFT (NR_FILES_SIZE + NR_FILES_SHIFT) +#define ERROR_MASK (((1ULL << ERROR_SIZE) - 1) << ERROR_SHIFT) +#define HAS_MORE_SHIFT (ERROR_SHIFT + ERROR_SIZE) +#define HAS_MORE_FLAG (1ULL << HAS_MORE_SHIFT) + +struct fsif_chmod_request { + uint32_t fd; + int32_t mode; +}; + +struct fsif_space_request { + grant_ref_t gref; +}; + +struct fsif_sync_request { + uint32_t fd; +}; + + +/* FS operation request */ +struct fsif_request { + uint8_t type; /* Type of the request */ + uint8_t pad; + uint16_t id; /* Request ID, copied to the response */ + uint32_t pad2; + union { + struct fsif_open_request fopen; + struct fsif_close_request fclose; + struct fsif_read_request fread; + struct fsif_write_request fwrite; + struct fsif_stat_request fstat; + struct fsif_truncate_request ftruncate; + struct fsif_remove_request fremove; + struct fsif_rename_request frename; + struct fsif_create_request fcreate; + struct fsif_list_request flist; + struct fsif_chmod_request fchmod; + struct fsif_space_request fspace; + struct fsif_sync_request fsync; + } u; +}; +typedef struct fsif_request fsif_request_t; + +/* FS operation response */ +struct fsif_response { + uint16_t id; + uint16_t pad1; + uint32_t pad2; + union { + uint64_t ret_val; + struct fsif_stat_response fstat; + } u; +}; + +typedef struct fsif_response fsif_response_t; + +#define FSIF_RING_ENTRY_SIZE 64 + +#define FSIF_NR_READ_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_read_request)) / \ + sizeof(grant_ref_t) + 1) +#define FSIF_NR_WRITE_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_write_request)) / \ + sizeof(grant_ref_t) + 1) + +DEFINE_RING_TYPES(fsif, struct fsif_request, struct fsif_response); + +#define STATE_INITIALISED "init" +#define STATE_READY "ready" +#define STATE_CLOSING "closing" +#define STATE_CLOSED "closed" + + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/kbdif.h xen-4.6.5/extras/mini-os/include/xen/io/kbdif.h --- xen-4.6.0/extras/mini-os/include/xen/io/kbdif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/kbdif.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,132 @@ +/* + * kbdif.h -- Xen virtual keyboard/mouse + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster + */ + +#ifndef __XEN_PUBLIC_IO_KBDIF_H__ +#define __XEN_PUBLIC_IO_KBDIF_H__ + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + */ + +/* Pointer movement event */ +#define XENKBD_TYPE_MOTION 1 +/* Event type 2 currently not used */ +/* Key event (includes pointer buttons) */ +#define XENKBD_TYPE_KEY 3 +/* + * Pointer position event + * Capable backend sets feature-abs-pointer in xenstore. + * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting + * request-abs-update in xenstore. + */ +#define XENKBD_TYPE_POS 4 + +struct xenkbd_motion +{ + uint8_t type; /* XENKBD_TYPE_MOTION */ + int32_t rel_x; /* relative X motion */ + int32_t rel_y; /* relative Y motion */ + int32_t rel_z; /* relative Z motion (wheel) */ +}; + +struct xenkbd_key +{ + uint8_t type; /* XENKBD_TYPE_KEY */ + uint8_t pressed; /* 1 if pressed; 0 otherwise */ + uint32_t keycode; /* KEY_* from linux/input.h */ +}; + +struct xenkbd_position +{ + uint8_t type; /* XENKBD_TYPE_POS */ + int32_t abs_x; /* absolute X position (in FB pixels) */ + int32_t abs_y; /* absolute Y position (in FB pixels) */ + int32_t rel_z; /* relative Z motion (wheel) */ +}; + +#define XENKBD_IN_EVENT_SIZE 40 + +union xenkbd_in_event +{ + uint8_t type; + struct xenkbd_motion motion; + struct xenkbd_key key; + struct xenkbd_position pos; + char pad[XENKBD_IN_EVENT_SIZE]; +}; + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + * No out events currently defined. + */ + +#define XENKBD_OUT_EVENT_SIZE 40 + +union xenkbd_out_event +{ + uint8_t type; + char pad[XENKBD_OUT_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENKBD_IN_RING_SIZE 2048 +#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) +#define XENKBD_IN_RING_OFFS 1024 +#define XENKBD_IN_RING(page) \ + ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) +#define XENKBD_IN_RING_REF(page, idx) \ + (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) + +#define XENKBD_OUT_RING_SIZE 1024 +#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) +#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) +#define XENKBD_OUT_RING(page) \ + ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) +#define XENKBD_OUT_RING_REF(page, idx) \ + (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) + +struct xenkbd_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; +}; + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/libxenvchan.h xen-4.6.5/extras/mini-os/include/xen/io/libxenvchan.h --- xen-4.6.0/extras/mini-os/include/xen/io/libxenvchan.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/libxenvchan.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,97 @@ +/** + * @file + * @section AUTHORS + * + * Copyright (C) 2010 Rafal Wojtczuk + * + * Authors: + * Rafal Wojtczuk + * Daniel De Graaf + * + * @section LICENSE + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * @section DESCRIPTION + * + * Originally borrowed from the Qubes OS Project, http://www.qubes-os.org, + * this code has been substantially rewritten to use the gntdev and gntalloc + * devices instead of raw MFNs and map_foreign_range. + * + * This is a library for inter-domain communication. A standard Xen ring + * buffer is used, with a datagram-based interface built on top. The grant + * reference and event channels are shared in XenStore under a user-specified + * path. + * + * The ring.h macros define an asymmetric interface to a shared data structure + * that assumes all rings reside in a single contiguous memory space. This is + * not suitable for vchan because the interface to the ring is symmetric except + * for the setup. Unlike the producer-consumer rings defined in ring.h, the + * size of the rings used in vchan are determined at execution time instead of + * compile time, so the macros in ring.h cannot be used to access the rings. + */ + +#include +#include + +struct ring_shared { + uint32_t cons, prod; +}; + +#define VCHAN_NOTIFY_WRITE 0x1 +#define VCHAN_NOTIFY_READ 0x2 + +/** + * vchan_interface: primary shared data structure + */ +struct vchan_interface { + /** + * Standard consumer/producer interface, one pair per buffer + * left is client write, server read + * right is client read, server write + */ + struct ring_shared left, right; + /** + * size of the rings, which determines their location + * 10 - at offset 1024 in ring's page + * 11 - at offset 2048 in ring's page + * 12+ - uses 2^(N-12) grants to describe the multi-page ring + * These should remain constant once the page is shared. + * Only one of the two orders can be 10 (or 11). + */ + uint16_t left_order, right_order; + /** + * Shutdown detection: + * 0: client (or server) has exited + * 1: client (or server) is connected + * 2: client has not yet connected + */ + uint8_t cli_live, srv_live; + /** + * Notification bits: + * VCHAN_NOTIFY_WRITE: send notify when data is written + * VCHAN_NOTIFY_READ: send notify when data is read (consumed) + * cli_notify is used for the client to inform the server of its action + */ + uint8_t cli_notify, srv_notify; + /** + * Grant list: ordering is left, right. Must not extend into actual ring + * or grow beyond the end of the initial shared page. + * These should remain constant once the page is shared, to allow + * for possible remapping by a client that restarts. + */ + uint32_t grants[0]; +}; + diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/netif.h xen-4.6.5/extras/mini-os/include/xen/io/netif.h --- xen-4.6.0/extras/mini-os/include/xen/io/netif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/netif.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,305 @@ +/****************************************************************************** + * netif.h + * + * Unified network-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_NETIF_H__ +#define __XEN_PUBLIC_IO_NETIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18 + +/* + * Notifications after enqueuing any type of message should be conditional on + * the appropriate req_event or rsp_event field in the shared ring. + * If the client sends notification for rx requests then it should specify + * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume + * that it cannot safely queue packets (as it may not be kicked to send them). + */ + +/* + * "feature-split-event-channels" is introduced to separate guest TX + * and RX notification. Backend either doesn't support this feature or + * advertises it via xenstore as 0 (disabled) or 1 (enabled). + * + * To make use of this feature, frontend should allocate two event + * channels for TX and RX, advertise them to backend as + * "event-channel-tx" and "event-channel-rx" respectively. If frontend + * doesn't want to use this feature, it just writes "event-channel" + * node as before. + */ + +/* + * Multiple transmit and receive queues: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vif, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues", set to the number they wish to use, which + * must be greater than zero, and no more than the value reported by the backend + * in "multi-queue-max-queues". + * + * Queues replicate the shared rings and event channels. + * "feature-split-event-channels" may optionally be used when using + * multiple queues, but is not mandatory. + * + * Each queue consists of one shared ring pair, i.e. there must be the same + * number of tx and rx rings. + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys, + * instead writing those keys under sub-keys having the name "queue-N" where + * N is the integer ID of the queue for which those keys belong. Queues + * are indexed from zero. For example, a frontend with two queues and split + * event channels must write the following set of queue-related keys: + * + * /local/domain/1/device/vif/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vif/0/queue-0 = "" + * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "" + * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "" + * /local/domain/1/device/vif/0/queue-1 = "" + * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = " + */ +#ifndef __XEN_PCI_COMMON_H__ +#define __XEN_PCI_COMMON_H__ + +/* Be sure to bump this number if you change this file */ +#define XEN_PCI_MAGIC "7" + +/* xen_pci_sharedinfo flags */ +#define _XEN_PCIF_active (0) +#define XEN_PCIF_active (1<<_XEN_PCIF_active) +#define _XEN_PCIB_AERHANDLER (1) +#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) +#define _XEN_PCIB_active (2) +#define XEN_PCIB_active (1<<_XEN_PCIB_active) + +/* xen_pci_op commands */ +#define XEN_PCI_OP_conf_read (0) +#define XEN_PCI_OP_conf_write (1) +#define XEN_PCI_OP_enable_msi (2) +#define XEN_PCI_OP_disable_msi (3) +#define XEN_PCI_OP_enable_msix (4) +#define XEN_PCI_OP_disable_msix (5) +#define XEN_PCI_OP_aer_detected (6) +#define XEN_PCI_OP_aer_resume (7) +#define XEN_PCI_OP_aer_mmio (8) +#define XEN_PCI_OP_aer_slotreset (9) +#define XEN_PCI_OP_enable_multi_msi (10) + +/* xen_pci_op error numbers */ +#define XEN_PCI_ERR_success (0) +#define XEN_PCI_ERR_dev_not_found (-1) +#define XEN_PCI_ERR_invalid_offset (-2) +#define XEN_PCI_ERR_access_denied (-3) +#define XEN_PCI_ERR_not_implemented (-4) +/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ +#define XEN_PCI_ERR_op_failed (-5) + +/* + * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry)) + * Should not exceed 128 + */ +#define SH_INFO_MAX_VEC 128 + +struct xen_msix_entry { + uint16_t vector; + uint16_t entry; +}; +struct xen_pci_op { + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + + /* OUT: will contain an error number (if any) from errno.h */ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment */ + uint32_t bus; + uint32_t devfn; + + /* IN: which configuration registers to touch */ + int32_t offset; + int32_t size; + + /* IN/OUT: Contains the result after a READ or the value to WRITE */ + uint32_t value; + /* IN: Contains extra infor for this operation */ + uint32_t info; + /*IN: param for msi-x */ + struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; +}; + +/*used for pcie aer handling*/ +struct xen_pcie_aer_op +{ + + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + /*IN/OUT: return aer_op result or carry error_detected state as input*/ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment*/ + uint32_t bus; + uint32_t devfn; +}; +struct xen_pci_sharedinfo { + /* flags - XEN_PCIF_* */ + uint32_t flags; + struct xen_pci_op op; + struct xen_pcie_aer_op aer_op; +}; + +#endif /* __XEN_PCI_COMMON_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/protocols.h xen-4.6.5/extras/mini-os/include/xen/io/protocols.h --- xen-4.6.0/extras/mini-os/include/xen/io/protocols.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/protocols.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,40 @@ +/****************************************************************************** + * protocols.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PROTOCOLS_H__ +#define __XEN_PROTOCOLS_H__ + +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" +#define XEN_IO_PROTO_ABI_ARM "arm-abi" + +#if defined(__i386__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 +#elif defined(__x86_64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 +#elif defined(__arm__) || defined(__aarch64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM +#else +# error arch fixup needed here +#endif + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/ring.h xen-4.6.5/extras/mini-os/include/xen/io/ring.h --- xen-4.6.0/extras/mini-os/include/xen/io/ring.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/ring.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,312 @@ +/****************************************************************************** + * ring.h + * + * Shared producer-consumer ring macros. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Tim Deegan and Andrew Warfield November 2004. + */ + +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ + +#include "../xen-compat.h" + +#if __XEN_INTERFACE_VERSION__ < 0x00030208 +#define xen_mb() mb() +#define xen_rmb() rmb() +#define xen_wmb() wmb() +#endif + +typedef unsigned int RING_IDX; + +/* Round a 32-bit unsigned constant down to the nearest power of two. */ +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) + +/* + * Calculate size of a shared ring, given the total available space for the + * ring and indexes (_sz), and the name tag of the request/response structure. + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + +/* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, + * let's say request_t, and response_t already defined. + * + * In a header where you want the ring datatype declared, you then do: + * + * DEFINE_RING_TYPES(mytag, request_t, response_t); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + */ + +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + union { \ + struct { \ + uint8_t smartpoll_active; \ + } netif; \ + struct { \ + uint8_t msg; \ + } tapif_user; \ + uint8_t pvt_pad[4]; \ + } private; \ + uint8_t __pad[44]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* Syntactic sugar */ \ +typedef struct __name##_sring __name##_sring_t; \ +typedef struct __name##_front_ring __name##_front_ring_t; \ +typedef struct __name##_back_ring __name##_back_ring_t + +/* + * Macros for manipulating rings. + * + * FRONT_RING_whatever works on the "front end" of a ring: here + * requests are pushed on to the ring and responses taken off it. + * + * BACK_RING_whatever works on the "back end" of a ring: here + * requests are taken off the ring and responses put on. + * + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * This is OK in 1-for-1 request-response situations where the + * requestor (front end) never has more than RING_SIZE()-1 + * outstanding requests. + */ + +/* Initialising empty rings */ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ +} while(0) + +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +/* How big is this ring? */ +#define RING_SIZE(_r) \ + ((_r)->nr_ents) + +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + +/* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + +/* Test if there are outstanding messages to be processed on a ring. */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +#ifdef __GNUC__ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) +#else +/* Same as above, but without the nice GCC ({ ... }) syntax. */ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ((((_r)->sring->req_prod - (_r)->req_cons) < \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ + ((_r)->sring->req_prod - (_r)->req_cons) : \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) +#endif + +/* Direct access to individual ring elements, by index. */ +#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + +/* Loop termination condition: Would the specified index overflow the ring? */ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + +#define RING_PUSH_REQUESTS(_r) do { \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +} while (0) + +#define RING_PUSH_RESPONSES(_r) do { \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +} while (0) + +/* + * Notification hold-off (req_event and rsp_event): + * + * When queueing requests or responses on a shared ring, it may not always be + * necessary to notify the remote end. For example, if requests are in flight + * in a backend, the front may be able to queue further requests without + * notifying the back (if the back checks for new requests when it queues + * responses). + * + * When enqueuing requests or responses: + * + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument + * is a boolean return value. True indicates that the receiver requires an + * asynchronous notification. + * + * After dequeuing requests or responses (before sleeping the connection): + * + * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). + * The second argument is a boolean return value. True indicates that there + * are pending messages on the ring (i.e., the connection should not be put + * to sleep). + * + * These macros will set the req_event/rsp_event field to trigger a + * notification on the very next message that is enqueued. If you want to + * create batches of work (i.e., only receive a notification after several + * messages have been enqueued) then you will need to create a customised + * version of the FINAL_CHECK macro in your own code, which sets the event + * field appropriately. + */ + +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + xen_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +} while (0) + +#endif /* __XEN_PUBLIC_IO_RING_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/tpmif.h xen-4.6.5/extras/mini-os/include/xen/io/tpmif.h --- xen-4.6.0/extras/mini-os/include/xen/io/tpmif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/tpmif.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,143 @@ +/****************************************************************************** + * tpmif.h + * + * TPM I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@us.ibm.com + * Grant table support: Mahadevan Gomathisankaran + * + * This code has been derived from tools/libxc/xen/io/netif.h + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_TPMIF_H__ +#define __XEN_PUBLIC_IO_TPMIF_H__ + +#include "../grant_table.h" + +struct tpmif_tx_request { + unsigned long addr; /* Machine address of packet. */ + grant_ref_t ref; /* grant table access reference */ + uint16_t unused; + uint16_t size; /* Packet size in bytes. */ +}; +typedef struct tpmif_tx_request tpmif_tx_request_t; + +/* + * The TPMIF_TX_RING_SIZE defines the number of pages the + * front-end and backend can exchange (= size of array). + */ +typedef uint32_t TPMIF_RING_IDX; + +#define TPMIF_TX_RING_SIZE 1 + +/* This structure must fit in a memory page. */ + +struct tpmif_ring { + struct tpmif_tx_request req; +}; +typedef struct tpmif_ring tpmif_ring_t; + +struct tpmif_tx_interface { + struct tpmif_ring ring[TPMIF_TX_RING_SIZE]; +}; +typedef struct tpmif_tx_interface tpmif_tx_interface_t; + +/****************************************************************************** + * TPM I/O interface for Xen guest OSes, v2 + * + * Author: Daniel De Graaf + * + * This protocol emulates the request/response behavior of a TPM using a Xen + * shared memory interface. All interaction with the TPM is at the direction + * of the frontend, since a TPM (hardware or virtual) is a passive device - + * the backend only processes commands as requested by the frontend. + * + * The frontend sends a request to the TPM by populating the shared page with + * the request packet, changing the state to TPMIF_STATE_SUBMIT, and sending + * and event channel notification. When the backend is finished, it will set + * the state to TPMIF_STATE_FINISH and send an event channel notification. + * + * In order to allow long-running commands to be canceled, the frontend can + * at any time change the state to TPMIF_STATE_CANCEL and send a notification. + * The TPM can either finish the command (changing state to TPMIF_STATE_FINISH) + * or can cancel the command and change the state to TPMIF_STATE_IDLE. The TPM + * can also change the state to TPMIF_STATE_IDLE instead of TPMIF_STATE_FINISH + * if another reason for cancellation is required - for example, a physical + * TPM may cancel a command if the interface is seized by another locality. + * + * The TPM command format is defined by the TCG, and is available at + * http://www.trustedcomputinggroup.org/resources/tpm_main_specification + */ + +enum tpmif_state { + TPMIF_STATE_IDLE, /* no contents / vTPM idle / cancel complete */ + TPMIF_STATE_SUBMIT, /* request ready / vTPM working */ + TPMIF_STATE_FINISH, /* response ready / vTPM idle */ + TPMIF_STATE_CANCEL, /* cancel requested / vTPM working */ +}; +/* Note: The backend should only change state to IDLE or FINISH, while the + * frontend should only change to SUBMIT or CANCEL. Status changes do not need + * to use atomic operations. + */ + + +/* The shared page for vTPM request/response packets looks like: + * + * Offset Contents + * ================================================= + * 0 struct tpmif_shared_page + * 16 [optional] List of grant IDs + * 16+4*nr_extra_pages TPM packet data + * + * If the TPM packet data extends beyond the end of a single page, the grant IDs + * defined in extra_pages are used as if they were mapped immediately following + * the primary shared page. The grants are allocated by the frontend and mapped + * by the backend. Before sending a request spanning multiple pages, the + * frontend should verify that the TPM supports such large requests by querying + * the TPM_CAP_PROP_INPUT_BUFFER property from the TPM. + */ +struct tpmif_shared_page { + uint32_t length; /* request/response length in bytes */ + + uint8_t state; /* enum tpmif_state */ + uint8_t locality; /* for the current request */ + uint8_t pad; /* should be zero */ + + uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */ + uint32_t extra_pages[0]; /* grant IDs; length is actually nr_extra_pages */ +}; +typedef struct tpmif_shared_page tpmif_shared_page_t; + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/usbif.h xen-4.6.5/extras/mini-os/include/xen/io/usbif.h --- xen-4.6.0/extras/mini-os/include/xen/io/usbif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/usbif.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,150 @@ +/* + * usbif.h + * + * USB I/O interface for Xen guest OSes. + * + * Copyright (C) 2009, FUJITSU LABORATORIES LTD. + * Author: Noboru Iwamatsu + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_IO_USBIF_H__ +#define __XEN_PUBLIC_IO_USBIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +enum usb_spec_version { + USB_VER_UNKNOWN = 0, + USB_VER_USB11, + USB_VER_USB20, + USB_VER_USB30, /* not supported yet */ +}; + +/* + * USB pipe in usbif_request + * + * bits 0-5 are specific bits for virtual USB driver. + * bits 7-31 are standard urb pipe. + * + * - port number(NEW): bits 0-4 + * (USB_MAXCHILDREN is 31) + * + * - operation flag(NEW): bit 5 + * (0 = submit urb, + * 1 = unlink urb) + * + * - direction: bit 7 + * (0 = Host-to-Device [Out] + * 1 = Device-to-Host [In]) + * + * - device address: bits 8-14 + * + * - endpoint: bits 15-18 + * + * - pipe type: bits 30-31 + * (00 = isochronous, 01 = interrupt, + * 10 = control, 11 = bulk) + */ +#define usbif_pipeportnum(pipe) ((pipe) & 0x1f) +#define usbif_setportnum_pipe(pipe, portnum) \ + ((pipe)|(portnum)) + +#define usbif_pipeunlink(pipe) ((pipe) & 0x20) +#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe)) +#define usbif_setunlink_pipe(pipe) ((pipe)|(0x20)) + +#define USBIF_MAX_SEGMENTS_PER_REQUEST (16) + +/* + * RING for transferring urbs. + */ +struct usbif_request_segment { + grant_ref_t gref; + uint16_t offset; + uint16_t length; +}; + +struct usbif_urb_request { + uint16_t id; /* request id */ + uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */ + + /* basic urb parameter */ + uint32_t pipe; + uint16_t transfer_flags; + uint16_t buffer_length; + union { + uint8_t ctrl[8]; /* setup_packet (Ctrl) */ + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t start_frame; /* start frame */ + uint16_t number_of_packets; /* number of ISO packet */ + uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */ + } isoc; + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t pad[3]; + } intr; + + struct { + uint16_t unlink_id; /* unlink request id */ + uint16_t pad[3]; + } unlink; + + } u; + + /* urb data segments */ + struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct usbif_urb_request usbif_urb_request_t; + +struct usbif_urb_response { + uint16_t id; /* request id */ + uint16_t start_frame; /* start frame (ISO) */ + int32_t status; /* status (non-ISO) */ + int32_t actual_length; /* actual transfer length */ + int32_t error_count; /* number of ISO errors */ +}; +typedef struct usbif_urb_response usbif_urb_response_t; + +DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response); +#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, PAGE_SIZE) + +/* + * RING for notifying connect/disconnect events to frontend + */ +struct usbif_conn_request { + uint16_t id; +}; +typedef struct usbif_conn_request usbif_conn_request_t; + +struct usbif_conn_response { + uint16_t id; /* request id */ + uint8_t portnum; /* port number */ + uint8_t speed; /* usb_device_speed */ +}; +typedef struct usbif_conn_response usbif_conn_response_t; + +DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response); +#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, PAGE_SIZE) + +#endif /* __XEN_PUBLIC_IO_USBIF_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/vscsiif.h xen-4.6.5/extras/mini-os/include/xen/io/vscsiif.h --- xen-4.6.0/extras/mini-os/include/xen/io/vscsiif.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/vscsiif.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,260 @@ +/****************************************************************************** + * vscsiif.h + * + * Based on the blkif.h code. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright(c) FUJITSU Limited 2008. + */ + +#ifndef __XEN__PUBLIC_IO_SCSI_H__ +#define __XEN__PUBLIC_IO_SCSI_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen pvSCSI driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * p-devname + * Values: string + * + * A free string used to identify the physical device (e.g. a disk name). + * + * p-dev + * Values: string + * + * A string specifying the backend device: either a 4-tuple "h:c:t:l" + * (host, controller, target, lun, all integers), or a WWN (e.g. + * "naa.60014054ac780582"). + * + * v-dev + * Values: string + * + * A string specifying the frontend device in form of a 4-tuple "h:c:t:l" + * (host, controller, target, lun, all integers). + * + *--------------------------------- Features --------------------------------- + * + * feature-sg-grant + * Values: unsigned [VSCSIIF_SG_TABLESIZE...65535] + * Default Value: 0 + * + * Specifies the maximum number of scatter/gather elements in grant pages + * supported. If not set, the backend supports up to VSCSIIF_SG_TABLESIZE + * SG elements specified directly in the request. + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: unsigned + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * ring-ref + * Values: unsigned + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + */ + +/* Requests from the frontend to the backend */ + +/* + * Request a SCSI operation specified via a CDB in vscsiif_request.cmnd. + * The target is specified via channel, id and lun. + * + * The operation to be performed is specified via a CDB in cmnd[], the length + * of the CDB is in cmd_len. sc_data_direction specifies the direction of data + * (to the device, from the device, or none at all). + * + * If data is to be transferred to or from the device the buffer(s) in the + * guest memory is/are specified via one or multiple scsiif_request_segment + * descriptors each specifying a memory page via a grant_ref_t, a offset into + * the page and the length of the area in that page. All scsiif_request_segment + * areas concatenated form the resulting data buffer used by the operation. + * If the number of scsiif_request_segment areas is not too large (less than + * or equal VSCSIIF_SG_TABLESIZE) the areas can be specified directly in the + * seg[] array and the number of valid scsiif_request_segment elements is to be + * set in nr_segments. + * + * If "feature-sg-grant" in the Xenstore is set it is possible to specify more + * than VSCSIIF_SG_TABLESIZE scsiif_request_segment elements via indirection. + * The maximum number of allowed scsiif_request_segment elements is the value + * of the "feature-sg-grant" entry from Xenstore. When using indirection the + * seg[] array doesn't contain specifications of the data buffers, but + * references to scsiif_request_segment arrays, which in turn reference the + * data buffers. While nr_segments holds the number of populated seg[] entries + * (plus the set VSCSIIF_SG_GRANT bit), the number of scsiif_request_segment + * elements referencing the target data buffers is calculated from the lengths + * of the seg[] elements (the sum of all valid seg[].length divided by the + * size of one scsiif_request_segment structure). The frontend may use a mix of + * direct and indirect requests. + */ +#define VSCSIIF_ACT_SCSI_CDB 1 + +/* + * Request abort of a running operation for the specified target given by + * channel, id, lun and the operation's rqid in ref_rqid. + */ +#define VSCSIIF_ACT_SCSI_ABORT 2 + +/* + * Request a device reset of the specified target (channel and id). + */ +#define VSCSIIF_ACT_SCSI_RESET 3 + +/* + * Preset scatter/gather elements for a following request. Deprecated. + * Keeping the define only to avoid usage of the value "4" for other actions. + */ +#define VSCSIIF_ACT_SCSI_SG_PRESET 4 + +/* + * Maximum scatter/gather segments per request. + * + * Considering balance between allocating at least 16 "vscsiif_request" + * structures on one page (4096 bytes) and the number of scatter/gather + * elements needed, we decided to use 26 as a magic number. + * + * If "feature-sg-grant" is set, more scatter/gather elements can be specified + * by placing them in one or more (up to VSCSIIF_SG_TABLESIZE) granted pages. + * In this case the vscsiif_request seg elements don't contain references to + * the user data, but to the SG elements referencing the user data. + */ +#define VSCSIIF_SG_TABLESIZE 26 + +/* + * based on Linux kernel 2.6.18, still valid + * + * Changing these values requires support of multiple protocols via the rings + * as "old clients" will blindly use these values and the resulting structure + * sizes. + */ +#define VSCSIIF_MAX_COMMAND_SIZE 16 +#define VSCSIIF_SENSE_BUFFERSIZE 96 + +struct scsiif_request_segment { + grant_ref_t gref; + uint16_t offset; + uint16_t length; +}; +typedef struct scsiif_request_segment vscsiif_segment_t; + +#define VSCSIIF_SG_PER_PAGE (PAGE_SIZE / sizeof(struct scsiif_request_segment)) + +/* Size of one request is 252 bytes */ +struct vscsiif_request { + uint16_t rqid; /* private guest value, echoed in resp */ + uint8_t act; /* command between backend and frontend */ + uint8_t cmd_len; /* valid CDB bytes */ + + uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; /* the CDB */ + uint16_t timeout_per_command; /* deprecated: timeout in secs, 0=default */ + uint16_t channel, id, lun; /* (virtual) device specification */ + uint16_t ref_rqid; /* command abort reference */ + uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1) + DMA_FROM_DEVICE(2) + DMA_NONE(3) requests */ + uint8_t nr_segments; /* Number of pieces of scatter-gather */ +/* + * flag in nr_segments: SG elements via grant page + * + * If VSCSIIF_SG_GRANT is set, the low 7 bits of nr_segments specify the number + * of grant pages containing SG elements. Usable if "feature-sg-grant" set. + */ +#define VSCSIIF_SG_GRANT 0x80 + + vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE]; + uint32_t reserved[3]; +}; +typedef struct vscsiif_request vscsiif_request_t; + +/* + * The following interface is deprecated! + */ +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \ + / sizeof(vscsiif_segment_t)) + +struct vscsiif_sg_list { + /* First two fields must match struct vscsiif_request! */ + uint16_t rqid; /* private guest value, must match main req */ + uint8_t act; /* VSCSIIF_ACT_SCSI_SG_PRESET */ + uint8_t nr_segments; /* Number of pieces of scatter-gather */ + vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE]; +}; +typedef struct vscsiif_sg_list vscsiif_sg_list_t; +/* End of deprecated interface */ + +/* Size of one response is 252 bytes */ +struct vscsiif_response { + uint16_t rqid; /* identifies request */ + uint8_t act; /* deprecated: valid only if SG_PRESET supported */ + uint8_t sense_len; + uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE]; + int32_t rslt; + uint32_t residual_len; /* request bufflen - + return the value from physical device */ + uint32_t reserved[36]; +}; +typedef struct vscsiif_response vscsiif_response_t; + +DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response); + + +#endif /*__XEN__PUBLIC_IO_SCSI_H__*/ +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/xenbus.h xen-4.6.5/extras/mini-os/include/xen/io/xenbus.h --- xen-4.6.0/extras/mini-os/include/xen/io/xenbus.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/xenbus.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,80 @@ +/***************************************************************************** + * xenbus.h + * + * Xenbus protocol details. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 XenSource Ltd. + */ + +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H + +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, + + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6, + + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 +}; +typedef enum xenbus_state XenbusState; + +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/io/xs_wire.h xen-4.6.5/extras/mini-os/include/xen/io/xs_wire.h --- xen-4.6.0/extras/mini-os/include/xen/io/xs_wire.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/io/xs_wire.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,149 @@ +/* + * Details of the "wire" protocol between Xen Store Daemon and client + * library or guest kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Rusty Russell IBM Corporation + */ + +#ifndef _XS_WIRE_H +#define _XS_WIRE_H + +enum xsd_sockmsg_type +{ + XS_DEBUG, + XS_DIRECTORY, + XS_READ, + XS_GET_PERMS, + XS_WATCH, + XS_UNWATCH, + XS_TRANSACTION_START, + XS_TRANSACTION_END, + XS_INTRODUCE, + XS_RELEASE, + XS_GET_DOMAIN_PATH, + XS_WRITE, + XS_MKDIR, + XS_RM, + XS_SET_PERMS, + XS_WATCH_EVENT, + XS_ERROR, + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME, + XS_SET_TARGET, + XS_RESTRICT, + XS_RESET_WATCHES, + + XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */ +}; + +#define XS_WRITE_NONE "NONE" +#define XS_WRITE_CREATE "CREATE" +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" + +/* We hand errors as strings, for portability. */ +struct xsd_errors +{ + int errnum; + const char *errstring; +}; +#ifdef EINVAL +#define XSD_ERROR(x) { x, #x } +/* LINTED: static unused */ +static struct xsd_errors xsd_errors[] +#if defined(__GNUC__) +__attribute__((unused)) +#endif + = { + XSD_ERROR(EINVAL), + XSD_ERROR(EACCES), + XSD_ERROR(EEXIST), + XSD_ERROR(EISDIR), + XSD_ERROR(ENOENT), + XSD_ERROR(ENOMEM), + XSD_ERROR(ENOSPC), + XSD_ERROR(EIO), + XSD_ERROR(ENOTEMPTY), + XSD_ERROR(ENOSYS), + XSD_ERROR(EROFS), + XSD_ERROR(EBUSY), + XSD_ERROR(EAGAIN), + XSD_ERROR(EISCONN), + XSD_ERROR(E2BIG) +}; +#endif + +struct xsd_sockmsg +{ + uint32_t type; /* XS_??? */ + uint32_t req_id;/* Request identifier, echoed in daemon's response. */ + uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */ + uint32_t len; /* Length of data following this. */ + + /* Generally followed by nul-terminated string(s). */ +}; + +enum xs_watch_type +{ + XS_WATCH_PATH = 0, + XS_WATCH_TOKEN +}; + +/* + * `incontents 150 xenstore_struct XenStore wire protocol. + * + * Inter-domain shared memory communications. */ +#define XENSTORE_RING_SIZE 1024 +typedef uint32_t XENSTORE_RING_IDX; +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1)) +struct xenstore_domain_interface { + char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */ + char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */ + XENSTORE_RING_IDX req_cons, req_prod; + XENSTORE_RING_IDX rsp_cons, rsp_prod; + uint32_t server_features; /* Bitmap of features supported by the server */ + uint32_t connection; +}; + +/* Violating this is very bad. See docs/misc/xenstore.txt. */ +#define XENSTORE_PAYLOAD_MAX 4096 + +/* Violating these just gets you an error back */ +#define XENSTORE_ABS_PATH_MAX 3072 +#define XENSTORE_REL_PATH_MAX 2048 + +/* The ability to reconnect a ring */ +#define XENSTORE_SERVER_FEATURE_RECONNECTION 1 + +/* Valid values for the connection field */ +#define XENSTORE_CONNECTED 0 /* the steady-state */ +#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */ + +#endif /* _XS_WIRE_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/kexec.h xen-4.6.5/extras/mini-os/include/xen/kexec.h --- xen-4.6.0/extras/mini-os/include/xen/kexec.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/kexec.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,249 @@ +/****************************************************************************** + * kexec.h - Public portion + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Xen port written by: + * - Simon 'Horms' Horman + * - Magnus Damm + */ + +#ifndef _XEN_PUBLIC_KEXEC_H +#define _XEN_PUBLIC_KEXEC_H + + +/* This file describes the Kexec / Kdump hypercall interface for Xen. + * + * Kexec under vanilla Linux allows a user to reboot the physical machine + * into a new user-specified kernel. The Xen port extends this idea + * to allow rebooting of the machine from dom0. When kexec for dom0 + * is used to reboot, both the hypervisor and the domains get replaced + * with some other kernel. It is possible to kexec between vanilla + * Linux and Xen and back again. Xen to Xen works well too. + * + * The hypercall interface for kexec can be divided into three main + * types of hypercall operations: + * + * 1) Range information: + * This is used by the dom0 kernel to ask the hypervisor about various + * address information. This information is needed to allow kexec-tools + * to fill in the ELF headers for /proc/vmcore properly. + * + * 2) Load and unload of images: + * There are no big surprises here, the kexec binary from kexec-tools + * runs in userspace in dom0. The tool loads/unloads data into the + * dom0 kernel such as new kernel, initramfs and hypervisor. When + * loaded the dom0 kernel performs a load hypercall operation, and + * before releasing all page references the dom0 kernel calls unload. + * + * 3) Kexec operation: + * This is used to start a previously loaded kernel. + */ + +#include "xen.h" + +#if defined(__i386__) || defined(__x86_64__) +#define KEXEC_XEN_NO_PAGES 17 +#endif + +/* + * Prototype for this hypercall is: + * int kexec_op(int cmd, void *args) + * @cmd == KEXEC_CMD_... + * KEXEC operation to perform + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Kexec supports two types of operation: + * - kexec into a regular kernel, very similar to a standard reboot + * - KEXEC_TYPE_DEFAULT is used to specify this type + * - kexec into a special "crash kernel", aka kexec-on-panic + * - KEXEC_TYPE_CRASH is used to specify this type + * - parts of our system may be broken at kexec-on-panic time + * - the code should be kept as simple and self-contained as possible + */ + +#define KEXEC_TYPE_DEFAULT 0 +#define KEXEC_TYPE_CRASH 1 + + +/* The kexec implementation for Xen allows the user to load two + * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH. + * All data needed for a kexec reboot is kept in one xen_kexec_image_t + * per "instance". The data mainly consists of machine address lists to pages + * together with destination addresses. The data in xen_kexec_image_t + * is passed to the "code page" which is one page of code that performs + * the final relocations before jumping to the new kernel. + */ + +typedef struct xen_kexec_image { +#if defined(__i386__) || defined(__x86_64__) + unsigned long page_list[KEXEC_XEN_NO_PAGES]; +#endif + unsigned long indirection_page; + unsigned long start_address; +} xen_kexec_image_t; + +/* + * Perform kexec having previously loaded a kexec or kdump kernel + * as appropriate. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + * + * Control is transferred to the image entry point with the host in + * the following state. + * + * - The image may be executed on any PCPU and all other PCPUs are + * stopped. + * + * - Local interrupts are disabled. + * + * - Register values are undefined. + * + * - The image segments have writeable 1:1 virtual to machine + * mappings. The location of any page tables is undefined and these + * page table frames are not be mapped. + */ +#define KEXEC_CMD_kexec 0 +typedef struct xen_kexec_exec { + int type; +} xen_kexec_exec_t; + +/* + * Load/Unload kernel image for kexec or kdump. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + * image == relocation information for kexec (ignored for unload) [in] + */ +#define KEXEC_CMD_kexec_load_v1 1 /* obsolete since 0x00040400 */ +#define KEXEC_CMD_kexec_unload_v1 2 /* obsolete since 0x00040400 */ +typedef struct xen_kexec_load_v1 { + int type; + xen_kexec_image_t image; +} xen_kexec_load_v1_t; + +#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */ +#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */ +#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */ +#define KEXEC_RANGE_MA_XENHEAP 3 /* machine address and size of xenheap + * Note that although this is adjacent + * to Xen it exists in a separate EFI + * region on ia64, and thus needs to be + * inserted into iomem_machine separately */ +#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* Obsolete: machine address and size of + * the ia64_boot_param */ +#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of + * of the EFI Memory Map */ +#define KEXEC_RANGE_MA_VMCOREINFO 6 /* machine address and size of vmcoreinfo */ + +/* + * Find the address and size of certain memory areas + * range == KEXEC_RANGE_... [in] + * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in] + * size == number of bytes reserved in window [out] + * start == address of the first byte in the window [out] + */ +#define KEXEC_CMD_kexec_get_range 3 +typedef struct xen_kexec_range { + int range; + int nr; + unsigned long size; + unsigned long start; +} xen_kexec_range_t; + +#if __XEN_INTERFACE_VERSION__ >= 0x00040400 +/* + * A contiguous chunk of a kexec image and it's destination machine + * address. + */ +typedef struct xen_kexec_segment { + union { + XEN_GUEST_HANDLE(const_void) h; + uint64_t _pad; + } buf; + uint64_t buf_size; + uint64_t dest_maddr; + uint64_t dest_size; +} xen_kexec_segment_t; +DEFINE_XEN_GUEST_HANDLE(xen_kexec_segment_t); + +/* + * Load a kexec image into memory. + * + * For KEXEC_TYPE_DEFAULT images, the segments may be anywhere in RAM. + * The image is relocated prior to being executed. + * + * For KEXEC_TYPE_CRASH images, each segment of the image must reside + * in the memory region reserved for kexec (KEXEC_RANGE_MA_CRASH) and + * the entry point must be within the image. The caller is responsible + * for ensuring that multiple images do not overlap. + * + * All image segments will be loaded to their destination machine + * addresses prior to being executed. The trailing portion of any + * segments with a source buffer (from dest_maddr + buf_size to + * dest_maddr + dest_size) will be zeroed. + * + * Segments with no source buffer will be accessible to the image when + * it is executed. + */ + +#define KEXEC_CMD_kexec_load 4 +typedef struct xen_kexec_load { + uint8_t type; /* One of KEXEC_TYPE_* */ + uint8_t _pad; + uint16_t arch; /* ELF machine type (EM_*). */ + uint32_t nr_segments; + union { + XEN_GUEST_HANDLE(xen_kexec_segment_t) h; + uint64_t _pad; + } segments; + uint64_t entry_maddr; /* image entry point machine address. */ +} xen_kexec_load_t; +DEFINE_XEN_GUEST_HANDLE(xen_kexec_load_t); + +/* + * Unload a kexec image. + * + * Type must be one of KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH. + */ +#define KEXEC_CMD_kexec_unload 5 +typedef struct xen_kexec_unload { + uint8_t type; +} xen_kexec_unload_t; +DEFINE_XEN_GUEST_HANDLE(xen_kexec_unload_t); + +#else /* __XEN_INTERFACE_VERSION__ < 0x00040400 */ + +#define KEXEC_CMD_kexec_load KEXEC_CMD_kexec_load_v1 +#define KEXEC_CMD_kexec_unload KEXEC_CMD_kexec_unload_v1 +#define xen_kexec_load xen_kexec_load_v1 +#define xen_kexec_load_t xen_kexec_load_v1_t + +#endif + +#endif /* _XEN_PUBLIC_KEXEC_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/mem_event.h xen-4.6.5/extras/mini-os/include/xen/mem_event.h --- xen-4.6.0/extras/mini-os/include/xen/mem_event.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/mem_event.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,134 @@ +/****************************************************************************** + * mem_event.h + * + * Memory event common structures. + * + * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _XEN_PUBLIC_MEM_EVENT_H +#define _XEN_PUBLIC_MEM_EVENT_H + +#include "xen.h" +#include "io/ring.h" + +/* Memory event flags */ +#define MEM_EVENT_FLAG_VCPU_PAUSED (1 << 0) +#define MEM_EVENT_FLAG_DROP_PAGE (1 << 1) +#define MEM_EVENT_FLAG_EVICT_FAIL (1 << 2) +#define MEM_EVENT_FLAG_FOREIGN (1 << 3) +#define MEM_EVENT_FLAG_DUMMY (1 << 4) +/* + * Emulate the fault-causing instruction (if set in the event response flags). + * This will allow the guest to continue execution without lifting the page + * access restrictions. + */ +#define MEM_EVENT_FLAG_EMULATE (1 << 5) +/* + * Same as MEM_EVENT_FLAG_EMULATE, but with write operations or operations + * potentially having side effects (like memory mapped or port I/O) disabled. + */ +#define MEM_EVENT_FLAG_EMULATE_NOWRITE (1 << 6) + +/* Reasons for the memory event request */ +#define MEM_EVENT_REASON_UNKNOWN 0 /* typical reason */ +#define MEM_EVENT_REASON_VIOLATION 1 /* access violation, GFN is address */ +#define MEM_EVENT_REASON_CR0 2 /* CR0 was hit: gfn is new CR0 value, gla is previous */ +#define MEM_EVENT_REASON_CR3 3 /* CR3 was hit: gfn is new CR3 value, gla is previous */ +#define MEM_EVENT_REASON_CR4 4 /* CR4 was hit: gfn is new CR4 value, gla is previous */ +#define MEM_EVENT_REASON_INT3 5 /* int3 was hit: gla/gfn are RIP */ +#define MEM_EVENT_REASON_SINGLESTEP 6 /* single step was invoked: gla/gfn are RIP */ +#define MEM_EVENT_REASON_MSR 7 /* MSR was hit: gfn is MSR value, gla is MSR address; + does NOT honour HVMPME_onchangeonly */ + +/* Using a custom struct (not hvm_hw_cpu) so as to not fill + * the mem_event ring buffer too quickly. */ +struct mem_event_regs_x86 { + uint64_t rax; + uint64_t rcx; + uint64_t rdx; + uint64_t rbx; + uint64_t rsp; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rflags; + uint64_t dr7; + uint64_t rip; + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + uint64_t msr_efer; + uint64_t msr_star; + uint64_t msr_lstar; + uint64_t fs_base; + uint64_t gs_base; + uint32_t cs_arbytes; + uint32_t _pad; +}; + +typedef struct mem_event_st { + uint32_t flags; + uint32_t vcpu_id; + + uint64_t gfn; + uint64_t offset; + uint64_t gla; /* if gla_valid */ + + uint32_t p2mt; + + uint16_t access_r:1; + uint16_t access_w:1; + uint16_t access_x:1; + uint16_t gla_valid:1; + uint16_t fault_with_gla:1; + uint16_t fault_in_gpt:1; + uint16_t available:10; + + uint16_t reason; + struct mem_event_regs_x86 x86_regs; +} mem_event_request_t, mem_event_response_t; + +DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t); + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/memory.h xen-4.6.5/extras/mini-os/include/xen/memory.h --- xen-4.6.0/extras/mini-os/include/xen/memory.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/memory.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,587 @@ +/****************************************************************************** + * memory.h + * + * Memory reservation and information. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_MEMORY_H__ +#define __XEN_PUBLIC_MEMORY_H__ + +#include "xen.h" + +/* + * Increase or decrease the specified domain's memory reservation. Returns the + * number of extents successfully allocated or freed. + * arg == addr of struct xen_memory_reservation. + */ +#define XENMEM_increase_reservation 0 +#define XENMEM_decrease_reservation 1 +#define XENMEM_populate_physmap 6 + +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 +/* + * Maximum # bits addressable by the user of the allocated region (e.g., I/O + * devices often have a 32-bit limitation even in 64-bit systems). If zero + * then the user has no addressing restriction. This field is not used by + * XENMEM_decrease_reservation. + */ +#define XENMEMF_address_bits(x) (x) +#define XENMEMF_get_address_bits(x) ((x) & 0xffu) +/* NUMA node to allocate from. */ +#define XENMEMF_node(x) (((x) + 1) << 8) +#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) +/* Flag to populate physmap with populate-on-demand entries */ +#define XENMEMF_populate_on_demand (1<<16) +/* Flag to request allocation only from the node specified */ +#define XENMEMF_exact_node_request (1<<17) +#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request) +#endif + +struct xen_memory_reservation { + + /* + * XENMEM_increase_reservation: + * OUT: MFN (*not* GMFN) bases of extents that were allocated + * XENMEM_decrease_reservation: + * IN: GMFN bases of extents to free + * XENMEM_populate_physmap: + * IN: GPFN bases of extents to populate with memory + * OUT: GMFN bases of extents that were allocated + * (NB. This command also updates the mach_to_phys translation table) + * XENMEM_claim_pages: + * IN: must be zero + */ + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; + + /* Number of extents, and size/alignment of each (2^extent_order pages). */ + xen_ulong_t nr_extents; + unsigned int extent_order; + +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 + /* XENMEMF flags. */ + unsigned int mem_flags; +#else + unsigned int address_bits; +#endif + + /* + * Domain whose reservation is being changed. + * Unprivileged domains can specify only DOMID_SELF. + */ + domid_t domid; +}; +typedef struct xen_memory_reservation xen_memory_reservation_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); + +/* + * An atomic exchange of memory pages. If return code is zero then + * @out.extent_list provides GMFNs of the newly-allocated memory. + * Returns zero on complete success, otherwise a negative error code. + * On complete success then always @nr_exchanged == @in.nr_extents. + * On partial success @nr_exchanged indicates how much work was done. + */ +#define XENMEM_exchange 11 +struct xen_memory_exchange { + /* + * [IN] Details of memory extents to be exchanged (GMFN bases). + * Note that @in.address_bits is ignored and unused. + */ + struct xen_memory_reservation in; + + /* + * [IN/OUT] Details of new memory extents. + * We require that: + * 1. @in.domid == @out.domid + * 2. @in.nr_extents << @in.extent_order == + * @out.nr_extents << @out.extent_order + * 3. @in.extent_start and @out.extent_start lists must not overlap + * 4. @out.extent_start lists GPFN bases to be populated + * 5. @out.extent_start is overwritten with allocated GMFN bases + */ + struct xen_memory_reservation out; + + /* + * [OUT] Number of input extents that were successfully exchanged: + * 1. The first @nr_exchanged input extents were successfully + * deallocated. + * 2. The corresponding first entries in the output extent list correctly + * indicate the GMFNs that were successfully exchanged. + * 3. All other input and output extents are untouched. + * 4. If not all input exents are exchanged then the return code of this + * command will be non-zero. + * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! + */ + xen_ulong_t nr_exchanged; +}; +typedef struct xen_memory_exchange xen_memory_exchange_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t); + +/* + * Returns the maximum machine frame number of mapped RAM in this system. + * This command always succeeds (it never returns an error code). + * arg == NULL. + */ +#define XENMEM_maximum_ram_page 2 + +/* + * Returns the current or maximum memory reservation, in pages, of the + * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. + * arg == addr of domid_t. + */ +#define XENMEM_current_reservation 3 +#define XENMEM_maximum_reservation 4 + +/* + * Returns the maximum GPFN in use by the guest, or -ve errcode on failure. + */ +#define XENMEM_maximum_gpfn 14 + +/* + * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys + * mapping table. Architectures which do not have a m2p table do not implement + * this command. + * arg == addr of xen_machphys_mfn_list_t. + */ +#define XENMEM_machphys_mfn_list 5 +struct xen_machphys_mfn_list { + /* + * Size of the 'extent_start' array. Fewer entries will be filled if the + * machphys table is smaller than max_extents * 2MB. + */ + unsigned int max_extents; + + /* + * Pointer to buffer to fill with list of extent starts. If there are + * any large discontiguities in the machine address space, 2MB gaps in + * the machphys table will be represented by an MFN base of zero. + */ + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; + + /* + * Number of extents written to the above array. This will be smaller + * than 'max_extents' if the machphys table is smaller than max_e * 2MB. + */ + unsigned int nr_extents; +}; +typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); + +/* + * For a compat caller, this is identical to XENMEM_machphys_mfn_list. + * + * For a non compat caller, this functions similarly to + * XENMEM_machphys_mfn_list, but returns the mfns making up the compatibility + * m2p table. + */ +#define XENMEM_machphys_compat_mfn_list 25 + +/* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ + xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */ +}; +typedef struct xen_machphys_mapping xen_machphys_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t); + +/* Source mapping space. */ +/* ` enum phys_map_space { */ +#define XENMAPSPACE_shared_info 0 /* shared info page */ +#define XENMAPSPACE_grant_table 1 /* grant table page */ +#define XENMAPSPACE_gmfn 2 /* GMFN */ +#define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */ +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, + * XENMEM_add_to_physmap_batch only. */ +/* ` } */ + +/* + * Sets the GPFN at which a particular page appears in the specified guest's + * pseudophysical address space. + * arg == addr of xen_add_to_physmap_t. + */ +#define XENMEM_add_to_physmap 7 +struct xen_add_to_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* Number of pages to go through for gmfn_range */ + uint16_t size; + + unsigned int space; /* => enum phys_map_space */ + +#define XENMAPIDX_grant_table_status 0x80000000 + + /* Index into space being mapped. */ + xen_ulong_t idx; + + /* GPFN in domid where the source mapping page should appear. */ + xen_pfn_t gpfn; +}; +typedef struct xen_add_to_physmap xen_add_to_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); + +/* A batched version of add_to_physmap. */ +#define XENMEM_add_to_physmap_batch 23 +struct xen_add_to_physmap_batch { + /* IN */ + /* Which domain to change the mapping for. */ + domid_t domid; + uint16_t space; /* => enum phys_map_space */ + + /* Number of pages to go through */ + uint16_t size; + domid_t foreign_domid; /* IFF gmfn_foreign */ + + /* Indexes into space being mapped. */ + XEN_GUEST_HANDLE(xen_ulong_t) idxs; + + /* GPFN in domid where the source mapping page should appear. */ + XEN_GUEST_HANDLE(xen_pfn_t) gpfns; + + /* OUT */ + + /* Per index error code. */ + XEN_GUEST_HANDLE(int) errs; +}; +typedef struct xen_add_to_physmap_batch xen_add_to_physmap_batch_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_batch_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00040400 +#define XENMEM_add_to_physmap_range XENMEM_add_to_physmap_batch +#define xen_add_to_physmap_range xen_add_to_physmap_batch +typedef struct xen_add_to_physmap_batch xen_add_to_physmap_range_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_range_t); +#endif + +/* + * Unmaps the page appearing at a particular GPFN from the specified guest's + * pseudophysical address space. + * arg == addr of xen_remove_from_physmap_t. + */ +#define XENMEM_remove_from_physmap 15 +struct xen_remove_from_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* GPFN of the current mapping of the page. */ + xen_pfn_t gpfn; +}; +typedef struct xen_remove_from_physmap xen_remove_from_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t); + +/*** REMOVED ***/ +/*#define XENMEM_translate_gpfn_list 8*/ + +/* + * Returns the pseudo-physical memory map as it was when the domain + * was started (specified by XENMEM_set_memory_map). + * arg == addr of xen_memory_map_t. + */ +#define XENMEM_memory_map 9 +struct xen_memory_map { + /* + * On call the number of entries which can be stored in buffer. On + * return the number of entries which have been stored in + * buffer. + */ + unsigned int nr_entries; + + /* + * Entries in the buffer are in the same format as returned by the + * BIOS INT 0x15 EAX=0xE820 call. + */ + XEN_GUEST_HANDLE(void) buffer; +}; +typedef struct xen_memory_map xen_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t); + +/* + * Returns the real physical memory map. Passes the same structure as + * XENMEM_memory_map. + * arg == addr of xen_memory_map_t. + */ +#define XENMEM_machine_memory_map 10 + +/* + * Set the pseudo-physical memory map of a domain, as returned by + * XENMEM_memory_map. + * arg == addr of xen_foreign_memory_map_t. + */ +#define XENMEM_set_memory_map 13 +struct xen_foreign_memory_map { + domid_t domid; + struct xen_memory_map map; +}; +typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); + +#define XENMEM_set_pod_target 16 +#define XENMEM_get_pod_target 17 +struct xen_pod_target { + /* IN */ + uint64_t target_pages; + /* OUT */ + uint64_t tot_pages; + uint64_t pod_cache_pages; + uint64_t pod_entries; + /* IN */ + domid_t domid; +}; +typedef struct xen_pod_target xen_pod_target_t; + +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif + +/* + * Get the number of MFNs saved through memory sharing. + * The call never fails. + */ +#define XENMEM_get_sharing_freed_pages 18 +#define XENMEM_get_sharing_shared_pages 19 + +#define XENMEM_paging_op 20 +#define XENMEM_paging_op_nominate 0 +#define XENMEM_paging_op_evict 1 +#define XENMEM_paging_op_prep 2 + +struct xen_mem_event_op { + uint8_t op; /* XENMEM_*_op_* */ + domid_t domain; + + + /* PAGING_PREP IN: buffer to immediately fill page in */ + uint64_aligned_t buffer; + /* Other OPs */ + uint64_aligned_t gfn; /* IN: gfn of page being operated on */ +}; +typedef struct xen_mem_event_op xen_mem_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t); + +#define XENMEM_access_op 21 +#define XENMEM_access_op_resume 0 +#define XENMEM_access_op_set_access 1 +#define XENMEM_access_op_get_access 2 + +typedef enum { + XENMEM_access_n, + XENMEM_access_r, + XENMEM_access_w, + XENMEM_access_rw, + XENMEM_access_x, + XENMEM_access_rx, + XENMEM_access_wx, + XENMEM_access_rwx, + /* + * Page starts off as r-x, but automatically + * change to r-w on a write + */ + XENMEM_access_rx2rw, + /* + * Log access: starts off as n, automatically + * goes to rwx, generating an event without + * pausing the vcpu + */ + XENMEM_access_n2rwx, + /* Take the domain default */ + XENMEM_access_default +} xenmem_access_t; + +struct xen_mem_access_op { + /* XENMEM_access_op_* */ + uint8_t op; + /* xenmem_access_t */ + uint8_t access; + domid_t domid; + /* + * Number of pages for set op + * Ignored on setting default access and other ops + */ + uint32_t nr; + /* + * First pfn for set op + * pfn for get op + * ~0ull is used to set and get the default access for pages + */ + uint64_aligned_t pfn; +}; +typedef struct xen_mem_access_op xen_mem_access_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t); + +#define XENMEM_sharing_op 22 +#define XENMEM_sharing_op_nominate_gfn 0 +#define XENMEM_sharing_op_nominate_gref 1 +#define XENMEM_sharing_op_share 2 +#define XENMEM_sharing_op_resume 3 +#define XENMEM_sharing_op_debug_gfn 4 +#define XENMEM_sharing_op_debug_mfn 5 +#define XENMEM_sharing_op_debug_gref 6 +#define XENMEM_sharing_op_add_physmap 7 +#define XENMEM_sharing_op_audit 8 + +#define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10) +#define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9) + +/* The following allows sharing of grant refs. This is useful + * for sharing utilities sitting as "filters" in IO backends + * (e.g. memshr + blktap(2)). The IO backend is only exposed + * to grant references, and this allows sharing of the grefs */ +#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG (1ULL << 62) + +#define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val) \ + (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val) +#define XENMEM_SHARING_OP_FIELD_IS_GREF(field) \ + ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG) +#define XENMEM_SHARING_OP_FIELD_GET_GREF(field) \ + ((field) & (~XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG)) + +struct xen_mem_sharing_op { + uint8_t op; /* XENMEM_sharing_op_* */ + domid_t domain; + + union { + struct mem_sharing_op_nominate { /* OP_NOMINATE_xxx */ + union { + uint64_aligned_t gfn; /* IN: gfn to nominate */ + uint32_t grant_ref; /* IN: grant ref to nominate */ + } u; + uint64_aligned_t handle; /* OUT: the handle */ + } nominate; + struct mem_sharing_op_share { /* OP_SHARE/ADD_PHYSMAP */ + uint64_aligned_t source_gfn; /* IN: the gfn of the source page */ + uint64_aligned_t source_handle; /* IN: handle to the source page */ + uint64_aligned_t client_gfn; /* IN: the client gfn */ + uint64_aligned_t client_handle; /* IN: handle to the client page */ + domid_t client_domain; /* IN: the client domain id */ + } share; + struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ + union { + uint64_aligned_t gfn; /* IN: gfn to debug */ + uint64_aligned_t mfn; /* IN: mfn to debug */ + uint32_t gref; /* IN: gref to debug */ + } u; + } debug; + } u; +}; +typedef struct xen_mem_sharing_op xen_mem_sharing_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); + +/* + * Attempt to stake a claim for a domain on a quantity of pages + * of system RAM, but _not_ assign specific pageframes. Only + * arithmetic is performed so the hypercall is very fast and need + * not be preemptible, thus sidestepping time-of-check-time-of-use + * races for memory allocation. Returns 0 if the hypervisor page + * allocator has atomically and successfully claimed the requested + * number of pages, else non-zero. + * + * Any domain may have only one active claim. When sufficient memory + * has been allocated to resolve the claim, the claim silently expires. + * Claiming zero pages effectively resets any outstanding claim and + * is always successful. + * + * Note that a valid claim may be staked even after memory has been + * allocated for a domain. In this case, the claim is not incremental, + * i.e. if the domain's tot_pages is 3, and a claim is staked for 10, + * only 7 additional pages are claimed. + * + * Caller must be privileged or the hypercall fails. + */ +#define XENMEM_claim_pages 24 + +/* + * XENMEM_claim_pages flags - the are no flags at this time. + * The zero value is appropiate. + */ + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +/* + * XENMEM_get_vnumainfo used by guest to get + * vNUMA topology from hypervisor. + */ +#define XENMEM_get_vnumainfo 26 + +/* vNUMA node memory ranges */ +struct xen_vmemrange { + uint64_t start, end; + unsigned int flags; + unsigned int nid; +}; +typedef struct xen_vmemrange xen_vmemrange_t; +DEFINE_XEN_GUEST_HANDLE(xen_vmemrange_t); + +/* + * vNUMA topology specifies vNUMA node number, distance table, + * memory ranges and vcpu mapping provided for guests. + * XENMEM_get_vnumainfo hypercall expects to see from guest + * nr_vnodes, nr_vmemranges and nr_vcpus to indicate available memory. + * After filling guests structures, nr_vnodes, nr_vmemranges and nr_vcpus + * copied back to guest. Domain returns expected values of nr_vnodes, + * nr_vmemranges and nr_vcpus to guest if the values where incorrect. + */ +struct xen_vnuma_topology_info { + /* IN */ + domid_t domid; + uint16_t pad; + /* IN/OUT */ + unsigned int nr_vnodes; + unsigned int nr_vcpus; + unsigned int nr_vmemranges; + /* OUT */ + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vdistance; + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vcpu_to_vnode; + union { + XEN_GUEST_HANDLE(xen_vmemrange_t) h; + uint64_t pad; + } vmemrange; +}; +typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t); + +/* Next available subop number is 27 */ + +#endif /* __XEN_PUBLIC_MEMORY_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/nmi.h xen-4.6.5/extras/mini-os/include/xen/nmi.h --- xen-4.6.0/extras/mini-os/include/xen/nmi.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/nmi.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,85 @@ +/****************************************************************************** + * nmi.h + * + * NMI callback registration and reason codes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_NMI_H__ +#define __XEN_PUBLIC_NMI_H__ + +#include "xen.h" + +/* + * NMI reason codes: + * Currently these are x86-specific, stored in arch_shared_info.nmi_reason. + */ + /* I/O-check error reported via ISA port 0x61, bit 6. */ +#define _XEN_NMIREASON_io_error 0 +#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error) + /* PCI SERR reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_pci_serr 1 +#define XEN_NMIREASON_pci_serr (1UL << _XEN_NMIREASON_pci_serr) +#if __XEN_INTERFACE_VERSION__ < 0x00040300 /* legacy alias of the above */ + /* Parity error reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_parity_error 1 +#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error) +#endif + /* Unknown hardware-generated NMI. */ +#define _XEN_NMIREASON_unknown 2 +#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown) + +/* + * long nmi_op(unsigned int cmd, void *arg) + * NB. All ops return zero on success, else a negative error code. + */ + +/* + * Register NMI callback for this (calling) VCPU. Currently this only makes + * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. + * arg == pointer to xennmi_callback structure. + */ +#define XENNMI_register_callback 0 +struct xennmi_callback { + unsigned long handler_address; + unsigned long pad; +}; +typedef struct xennmi_callback xennmi_callback_t; +DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t); + +/* + * Deregister NMI callback for this (calling) VCPU. + * arg == NULL. + */ +#define XENNMI_unregister_callback 1 + +#endif /* __XEN_PUBLIC_NMI_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/physdev.h xen-4.6.5/extras/mini-os/include/xen/physdev.h --- xen-4.6.0/extras/mini-os/include/xen/physdev.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/physdev.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,380 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_PHYSDEV_H__ +#define __XEN_PUBLIC_PHYSDEV_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * int physdev_op(int cmd, void *args) + * @cmd == PHYSDEVOP_??? (physdev operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Notify end-of-interrupt (EOI) for the specified IRQ. + * @arg == pointer to physdev_eoi structure. + */ +#define PHYSDEVOP_eoi 12 +struct physdev_eoi { + /* IN */ + uint32_t irq; +}; +typedef struct physdev_eoi physdev_eoi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); + +/* + * Register a shared page for the hypervisor to indicate whether the guest + * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly + * once the guest used this function in that the associated event channel + * will automatically get unmasked. The page registered is used as a bit + * array indexed by Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v1 17 +/* + * Register a shared page for the hypervisor to indicate whether the + * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to + * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of + * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by + * Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v2 28 +struct physdev_pirq_eoi_gmfn { + /* IN */ + xen_pfn_t gmfn; +}; +typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t); + +/* + * Query the status of an IRQ line. + * @arg == pointer to physdev_irq_status_query structure. + */ +#define PHYSDEVOP_irq_status_query 5 +struct physdev_irq_status_query { + /* IN */ + uint32_t irq; + /* OUT */ + uint32_t flags; /* XENIRQSTAT_* */ +}; +typedef struct physdev_irq_status_query physdev_irq_status_query_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t); + +/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ +#define _XENIRQSTAT_needs_eoi (0) +#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi) + +/* IRQ shared by multiple guests? */ +#define _XENIRQSTAT_shared (1) +#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared) + +/* + * Set the current VCPU's I/O privilege level. + * @arg == pointer to physdev_set_iopl structure. + */ +#define PHYSDEVOP_set_iopl 6 +struct physdev_set_iopl { + /* IN */ + uint32_t iopl; +}; +typedef struct physdev_set_iopl physdev_set_iopl_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t); + +/* + * Set the current VCPU's I/O-port permissions bitmap. + * @arg == pointer to physdev_set_iobitmap structure. + */ +#define PHYSDEVOP_set_iobitmap 7 +struct physdev_set_iobitmap { + /* IN */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(uint8) bitmap; +#else + uint8_t *bitmap; +#endif + uint32_t nr_ports; +}; +typedef struct physdev_set_iobitmap physdev_set_iobitmap_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t); + +/* + * Read or write an IO-APIC register. + * @arg == pointer to physdev_apic structure. + */ +#define PHYSDEVOP_apic_read 8 +#define PHYSDEVOP_apic_write 9 +struct physdev_apic { + /* IN */ + unsigned long apic_physbase; + uint32_t reg; + /* IN or OUT */ + uint32_t value; +}; +typedef struct physdev_apic physdev_apic_t; +DEFINE_XEN_GUEST_HANDLE(physdev_apic_t); + +/* + * Allocate or free a physical upcall vector for the specified IRQ line. + * @arg == pointer to physdev_irq structure. + */ +#define PHYSDEVOP_alloc_irq_vector 10 +#define PHYSDEVOP_free_irq_vector 11 +struct physdev_irq { + /* IN */ + uint32_t irq; + /* IN or OUT */ + uint32_t vector; +}; +typedef struct physdev_irq physdev_irq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); + +#define MAP_PIRQ_TYPE_MSI 0x0 +#define MAP_PIRQ_TYPE_GSI 0x1 +#define MAP_PIRQ_TYPE_UNKNOWN 0x2 +#define MAP_PIRQ_TYPE_MSI_SEG 0x3 +#define MAP_PIRQ_TYPE_MULTI_MSI 0x4 + +#define PHYSDEVOP_map_pirq 13 +struct physdev_map_pirq { + domid_t domid; + /* IN */ + int type; + /* IN (ignored for ..._MULTI_MSI) */ + int index; + /* IN or OUT */ + int pirq; + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ + int bus; + /* IN */ + int devfn; + /* IN (also OUT for ..._MULTI_MSI) */ + int entry_nr; + /* IN */ + uint64_t table_base; +}; +typedef struct physdev_map_pirq physdev_map_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t); + +#define PHYSDEVOP_unmap_pirq 14 +struct physdev_unmap_pirq { + domid_t domid; + /* IN */ + int pirq; +}; + +typedef struct physdev_unmap_pirq physdev_unmap_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t); + +#define PHYSDEVOP_manage_pci_add 15 +#define PHYSDEVOP_manage_pci_remove 16 +struct physdev_manage_pci { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + +typedef struct physdev_manage_pci physdev_manage_pci_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); + +#define PHYSDEVOP_restore_msi 19 +struct physdev_restore_msi { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_restore_msi physdev_restore_msi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t); + +#define PHYSDEVOP_manage_pci_add_ext 20 +struct physdev_manage_pci_ext { + /* IN */ + uint8_t bus; + uint8_t devfn; + unsigned is_extfn; + unsigned is_virtfn; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +}; + +typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t); + +/* + * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() + * hypercall since 0x00030202. + */ +struct physdev_op { + uint32_t cmd; + union { + struct physdev_irq_status_query irq_status_query; + struct physdev_set_iopl set_iopl; + struct physdev_set_iobitmap set_iobitmap; + struct physdev_apic apic_op; + struct physdev_irq irq_op; + } u; +}; +typedef struct physdev_op physdev_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_op_t); + +#define PHYSDEVOP_setup_gsi 21 +struct physdev_setup_gsi { + int gsi; + /* IN */ + uint8_t triggering; + /* IN */ + uint8_t polarity; + /* IN */ +}; + +typedef struct physdev_setup_gsi physdev_setup_gsi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t); + +/* leave PHYSDEVOP 22 free */ + +/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI + * the hypercall returns a free pirq */ +#define PHYSDEVOP_get_free_pirq 23 +struct physdev_get_free_pirq { + /* IN */ + int type; + /* OUT */ + uint32_t pirq; +}; + +typedef struct physdev_get_free_pirq physdev_get_free_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t); + +#define XEN_PCI_MMCFG_RESERVED 0x1 + +#define PHYSDEVOP_pci_mmcfg_reserved 24 +struct physdev_pci_mmcfg_reserved { + uint64_t address; + uint16_t segment; + uint8_t start_bus; + uint8_t end_bus; + uint32_t flags; +}; +typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t); + +#define XEN_PCI_DEV_EXTFN 0x1 +#define XEN_PCI_DEV_VIRTFN 0x2 +#define XEN_PCI_DEV_PXM 0x4 + +#define PHYSDEVOP_pci_device_add 25 +struct physdev_pci_device_add { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; + uint32_t flags; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint32_t optarr[]; +#elif defined(__GNUC__) + uint32_t optarr[0]; +#endif +}; +typedef struct physdev_pci_device_add physdev_pci_device_add_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); + +#define PHYSDEVOP_pci_device_remove 26 +#define PHYSDEVOP_restore_msi_ext 27 +/* + * Dom0 should use these two to announce MMIO resources assigned to + * MSI-X capable devices won't (prepare) or may (release) change. + */ +#define PHYSDEVOP_prepare_msix 30 +#define PHYSDEVOP_release_msix 31 +struct physdev_pci_device { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_pci_device physdev_pci_device_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t); + +#define PHYSDEVOP_DBGP_RESET_PREPARE 1 +#define PHYSDEVOP_DBGP_RESET_DONE 2 + +#define PHYSDEVOP_DBGP_BUS_UNKNOWN 0 +#define PHYSDEVOP_DBGP_BUS_PCI 1 + +#define PHYSDEVOP_dbgp_op 29 +struct physdev_dbgp_op { + /* IN */ + uint8_t op; + uint8_t bus; + union { + struct physdev_pci_device pci; + } u; +}; +typedef struct physdev_dbgp_op physdev_dbgp_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); + +/* + * Notify that some PIRQ-bound event channels have been unmasked. + * ** This command is obsolete since interface version 0x00030202 and is ** + * ** unsupported by newer versions of Xen. ** + */ +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 + +#if __XEN_INTERFACE_VERSION__ < 0x00040600 +/* + * These all-capitals physdev operation names are superceded by the new names + * (defined above) since interface version 0x00030202. The guard above was + * added post-4.5 only though and hence shouldn't check for 0x00030202. + */ +#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query +#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl +#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap +#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read +#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write +#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector +#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector +#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi +#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared +#endif + +#if __XEN_INTERFACE_VERSION__ < 0x00040200 +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v1 +#else +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v2 +#endif + +#endif /* __XEN_PUBLIC_PHYSDEV_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/platform.h xen-4.6.5/extras/mini-os/include/xen/platform.h --- xen-4.6.0/extras/mini-os/include/xen/platform.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/platform.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,606 @@ +/****************************************************************************** + * platform.h + * + * Hardware platform operations. Intended for use by domain-0 kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_PLATFORM_H__ +#define __XEN_PUBLIC_PLATFORM_H__ + +#include "xen.h" + +#define XENPF_INTERFACE_VERSION 0x03000001 + +/* + * Set clock such that it would read after 00:00:00 UTC, + * 1 January, 1970 if the current system time was . + */ +#define XENPF_settime 17 +struct xenpf_settime { + /* IN variables. */ + uint32_t secs; + uint32_t nsecs; + uint64_t system_time; +}; +typedef struct xenpf_settime xenpf_settime_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t); + +/* + * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. + * On x86, @type is an architecture-defined MTRR memory type. + * On success, returns the MTRR that was used (@reg) and a handle that can + * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting. + * (x86-specific). + */ +#define XENPF_add_memtype 31 +struct xenpf_add_memtype { + /* IN variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; + /* OUT variables. */ + uint32_t handle; + uint32_t reg; +}; +typedef struct xenpf_add_memtype xenpf_add_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t); + +/* + * Tear down an existing memory-range type. If @handle is remembered then it + * should be passed in to accurately tear down the correct setting (in case + * of overlapping memory regions with differing types). If it is not known + * then @handle should be set to zero. In all cases @reg must be set. + * (x86-specific). + */ +#define XENPF_del_memtype 32 +struct xenpf_del_memtype { + /* IN variables. */ + uint32_t handle; + uint32_t reg; +}; +typedef struct xenpf_del_memtype xenpf_del_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t); + +/* Read current type of an MTRR (x86-specific). */ +#define XENPF_read_memtype 33 +struct xenpf_read_memtype { + /* IN variables. */ + uint32_t reg; + /* OUT variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; +}; +typedef struct xenpf_read_memtype xenpf_read_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t); + +#define XENPF_microcode_update 35 +struct xenpf_microcode_update { + /* IN variables. */ + XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */ + uint32_t length; /* Length of microcode data. */ +}; +typedef struct xenpf_microcode_update xenpf_microcode_update_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t); + +#define XENPF_platform_quirk 39 +#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */ +#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */ +#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */ +struct xenpf_platform_quirk { + /* IN variables. */ + uint32_t quirk_id; +}; +typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); + +#define XENPF_efi_runtime_call 49 +#define XEN_EFI_get_time 1 +#define XEN_EFI_set_time 2 +#define XEN_EFI_get_wakeup_time 3 +#define XEN_EFI_set_wakeup_time 4 +#define XEN_EFI_get_next_high_monotonic_count 5 +#define XEN_EFI_get_variable 6 +#define XEN_EFI_set_variable 7 +#define XEN_EFI_get_next_variable_name 8 +#define XEN_EFI_query_variable_info 9 +#define XEN_EFI_query_capsule_capabilities 10 +#define XEN_EFI_update_capsule 11 +struct xenpf_efi_runtime_call { + uint32_t function; + /* + * This field is generally used for per sub-function flags (defined + * below), except for the XEN_EFI_get_next_high_monotonic_count case, + * where it holds the single returned value. + */ + uint32_t misc; + xen_ulong_t status; + union { +#define XEN_EFI_GET_TIME_SET_CLEARS_NS 0x00000001 + struct { + struct xenpf_efi_time { + uint16_t year; + uint8_t month; + uint8_t day; + uint8_t hour; + uint8_t min; + uint8_t sec; + uint32_t ns; + int16_t tz; + uint8_t daylight; + } time; + uint32_t resolution; + uint32_t accuracy; + } get_time; + + struct xenpf_efi_time set_time; + +#define XEN_EFI_GET_WAKEUP_TIME_ENABLED 0x00000001 +#define XEN_EFI_GET_WAKEUP_TIME_PENDING 0x00000002 + struct xenpf_efi_time get_wakeup_time; + +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE 0x00000001 +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY 0x00000002 + struct xenpf_efi_time set_wakeup_time; + +#define XEN_EFI_VARIABLE_NON_VOLATILE 0x00000001 +#define XEN_EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002 +#define XEN_EFI_VARIABLE_RUNTIME_ACCESS 0x00000004 + struct { + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + xen_ulong_t size; + XEN_GUEST_HANDLE(void) data; + struct xenpf_efi_guid { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; + } vendor_guid; + } get_variable, set_variable; + + struct { + xen_ulong_t size; + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + struct xenpf_efi_guid vendor_guid; + } get_next_variable_name; + +#define XEN_EFI_VARINFO_BOOT_SNAPSHOT 0x00000001 + struct { + uint32_t attr; + uint64_t max_store_size; + uint64_t remain_store_size; + uint64_t max_size; + } query_variable_info; + + struct { + XEN_GUEST_HANDLE(void) capsule_header_array; + xen_ulong_t capsule_count; + uint64_t max_capsule_size; + uint32_t reset_type; + } query_capsule_capabilities; + + struct { + XEN_GUEST_HANDLE(void) capsule_header_array; + xen_ulong_t capsule_count; + uint64_t sg_list; /* machine address */ + } update_capsule; + } u; +}; +typedef struct xenpf_efi_runtime_call xenpf_efi_runtime_call_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_efi_runtime_call_t); + +#define XENPF_firmware_info 50 +#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ +#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ +#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ +#define XEN_FW_EFI_INFO 4 /* from EFI */ +#define XEN_FW_EFI_VERSION 0 +#define XEN_FW_EFI_CONFIG_TABLE 1 +#define XEN_FW_EFI_VENDOR 2 +#define XEN_FW_EFI_MEM_INFO 3 +#define XEN_FW_EFI_RT_VERSION 4 +#define XEN_FW_EFI_PCI_ROM 5 +#define XEN_FW_KBD_SHIFT_FLAGS 5 +struct xenpf_firmware_info { + /* IN variables. */ + uint32_t type; + uint32_t index; + /* OUT variables. */ + union { + struct { + /* Int13, Fn48: Check Extensions Present. */ + uint8_t device; /* %dl: bios device number */ + uint8_t version; /* %ah: major version */ + uint16_t interface_support; /* %cx: support bitmap */ + /* Int13, Fn08: Legacy Get Device Parameters. */ + uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ + uint8_t legacy_max_head; /* %dh: max head # */ + uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ + /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ + /* NB. First uint16_t of buffer must be set to buffer size. */ + XEN_GUEST_HANDLE(void) edd_params; + } disk_info; /* XEN_FW_DISK_INFO */ + struct { + uint8_t device; /* bios device number */ + uint32_t mbr_signature; /* offset 0x1b8 in mbr */ + } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ + struct { + /* Int10, AX=4F15: Get EDID info. */ + uint8_t capabilities; + uint8_t edid_transfer_time; + /* must refer to 128-byte buffer */ + XEN_GUEST_HANDLE(uint8) edid; + } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + union xenpf_efi_info { + uint32_t version; + struct { + uint64_t addr; /* EFI_CONFIGURATION_TABLE */ + uint32_t nent; + } cfg; + struct { + uint32_t revision; + uint32_t bufsz; /* input, in bytes */ + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + } vendor; + struct { + uint64_t addr; + uint64_t size; + uint64_t attr; + uint32_t type; + } mem; + struct { + /* IN variables */ + uint16_t segment; + uint8_t bus; + uint8_t devfn; + uint16_t vendor; + uint16_t devid; + /* OUT variables */ + uint64_t address; + xen_ulong_t size; + } pci_rom; + } efi_info; /* XEN_FW_EFI_INFO */ + + /* Int16, Fn02: Get keyboard shift flags. */ + uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */ + } u; +}; +typedef struct xenpf_firmware_info xenpf_firmware_info_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t); + +#define XENPF_enter_acpi_sleep 51 +struct xenpf_enter_acpi_sleep { + /* IN variables */ +#if __XEN_INTERFACE_VERSION__ < 0x00040300 + uint16_t pm1a_cnt_val; /* PM1a control value. */ + uint16_t pm1b_cnt_val; /* PM1b control value. */ +#else + uint16_t val_a; /* PM1a control / sleep type A. */ + uint16_t val_b; /* PM1b control / sleep type B. */ +#endif + uint32_t sleep_state; /* Which state to enter (Sn). */ +#define XENPF_ACPI_SLEEP_EXTENDED 0x00000001 + uint32_t flags; /* XENPF_ACPI_SLEEP_*. */ +}; +typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t); + +#define XENPF_change_freq 52 +struct xenpf_change_freq { + /* IN variables */ + uint32_t flags; /* Must be zero. */ + uint32_t cpu; /* Physical cpu. */ + uint64_t freq; /* New frequency (Hz). */ +}; +typedef struct xenpf_change_freq xenpf_change_freq_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t); + +/* + * Get idle times (nanoseconds since boot) for physical CPUs specified in the + * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is + * indexed by CPU number; only entries with the corresponding @cpumap_bitmap + * bit set are written to. On return, @cpumap_bitmap is modified so that any + * non-existent CPUs are cleared. Such CPUs have their @idletime array entry + * cleared. + */ +#define XENPF_getidletime 53 +struct xenpf_getidletime { + /* IN/OUT variables */ + /* IN: CPUs to interrogate; OUT: subset of IN which are present */ + XEN_GUEST_HANDLE(uint8) cpumap_bitmap; + /* IN variables */ + /* Size of cpumap bitmap. */ + uint32_t cpumap_nr_cpus; + /* Must be indexable for every cpu in cpumap_bitmap. */ + XEN_GUEST_HANDLE(uint64) idletime; + /* OUT variables */ + /* System time when the idletime snapshots were taken. */ + uint64_t now; +}; +typedef struct xenpf_getidletime xenpf_getidletime_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t); + +#define XENPF_set_processor_pminfo 54 + +/* ability bits */ +#define XEN_PROCESSOR_PM_CX 1 +#define XEN_PROCESSOR_PM_PX 2 +#define XEN_PROCESSOR_PM_TX 4 + +/* cmd type */ +#define XEN_PM_CX 0 +#define XEN_PM_PX 1 +#define XEN_PM_TX 2 +#define XEN_PM_PDC 3 + +/* Px sub info type */ +#define XEN_PX_PCT 1 +#define XEN_PX_PSS 2 +#define XEN_PX_PPC 4 +#define XEN_PX_PSD 8 + +struct xen_power_register { + uint32_t space_id; + uint32_t bit_width; + uint32_t bit_offset; + uint32_t access_size; + uint64_t address; +}; + +struct xen_processor_csd { + uint32_t domain; /* domain number of one dependent group */ + uint32_t coord_type; /* coordination type */ + uint32_t num; /* number of processors in same domain */ +}; +typedef struct xen_processor_csd xen_processor_csd_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t); + +struct xen_processor_cx { + struct xen_power_register reg; /* GAS for Cx trigger register */ + uint8_t type; /* cstate value, c0: 0, c1: 1, ... */ + uint32_t latency; /* worst latency (ms) to enter/exit this cstate */ + uint32_t power; /* average power consumption(mW) */ + uint32_t dpcnt; /* number of dependency entries */ + XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */ +}; +typedef struct xen_processor_cx xen_processor_cx_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t); + +struct xen_processor_flags { + uint32_t bm_control:1; + uint32_t bm_check:1; + uint32_t has_cst:1; + uint32_t power_setup_done:1; + uint32_t bm_rld_set:1; +}; + +struct xen_processor_power { + uint32_t count; /* number of C state entries in array below */ + struct xen_processor_flags flags; /* global flags of this processor */ + XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */ +}; + +struct xen_pct_register { + uint8_t descriptor; + uint16_t length; + uint8_t space_id; + uint8_t bit_width; + uint8_t bit_offset; + uint8_t reserved; + uint64_t address; +}; + +struct xen_processor_px { + uint64_t core_frequency; /* megahertz */ + uint64_t power; /* milliWatts */ + uint64_t transition_latency; /* microseconds */ + uint64_t bus_master_latency; /* microseconds */ + uint64_t control; /* control value */ + uint64_t status; /* success indicator */ +}; +typedef struct xen_processor_px xen_processor_px_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t); + +struct xen_psd_package { + uint64_t num_entries; + uint64_t revision; + uint64_t domain; + uint64_t coord_type; + uint64_t num_processors; +}; + +struct xen_processor_performance { + uint32_t flags; /* flag for Px sub info type */ + uint32_t platform_limit; /* Platform limitation on freq usage */ + struct xen_pct_register control_register; + struct xen_pct_register status_register; + uint32_t state_count; /* total available performance states */ + XEN_GUEST_HANDLE(xen_processor_px_t) states; + struct xen_psd_package domain_info; + uint32_t shared_type; /* coordination type of this processor */ +}; +typedef struct xen_processor_performance xen_processor_performance_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t); + +struct xenpf_set_processor_pminfo { + /* IN variables */ + uint32_t id; /* ACPI CPU ID */ + uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */ + union { + struct xen_processor_power power;/* Cx: _CST/_CSD */ + struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ + XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */ + } u; +}; +typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t); + +#define XENPF_get_cpuinfo 55 +struct xenpf_pcpuinfo { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; +#define XEN_PCPU_FLAGS_ONLINE 1 + /* Correponding xen_cpuid is not present*/ +#define XEN_PCPU_FLAGS_INVALID 2 + uint32_t flags; + uint32_t apic_id; + uint32_t acpi_id; +}; +typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t); + +#define XENPF_get_cpu_version 48 +struct xenpf_pcpu_version { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; + char vendor_id[12]; + uint32_t family; + uint32_t model; + uint32_t stepping; +}; +typedef struct xenpf_pcpu_version xenpf_pcpu_version_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t); + +#define XENPF_cpu_online 56 +#define XENPF_cpu_offline 57 +struct xenpf_cpu_ol +{ + uint32_t cpuid; +}; +typedef struct xenpf_cpu_ol xenpf_cpu_ol_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_cpu_ol_t); + +#define XENPF_cpu_hotadd 58 +struct xenpf_cpu_hotadd +{ + uint32_t apic_id; + uint32_t acpi_id; + uint32_t pxm; +}; + +#define XENPF_mem_hotadd 59 +struct xenpf_mem_hotadd +{ + uint64_t spfn; + uint64_t epfn; + uint32_t pxm; + uint32_t flags; +}; + +#define XENPF_core_parking 60 + +#define XEN_CORE_PARKING_SET 1 +#define XEN_CORE_PARKING_GET 2 +struct xenpf_core_parking { + /* IN variables */ + uint32_t type; + /* IN variables: set cpu nums expected to be idled */ + /* OUT variables: get cpu nums actually be idled */ + uint32_t idle_nums; +}; +typedef struct xenpf_core_parking xenpf_core_parking_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t); + +/* + * Access generic platform resources(e.g., accessing MSR, port I/O, etc) + * in unified way. Batch resource operations in one call are supported and + * they are always non-preemptible and executed in their original order. + * The batch itself returns a negative integer for general errors, or a + * non-negative integer for the number of successful operations. For the latter + * case, the @ret in the failed entry (if any) indicates the exact error. + */ +#define XENPF_resource_op 61 + +#define XEN_RESOURCE_OP_MSR_READ 0 +#define XEN_RESOURCE_OP_MSR_WRITE 1 + +struct xenpf_resource_entry { + union { + uint32_t cmd; /* IN: XEN_RESOURCE_OP_* */ + int32_t ret; /* OUT: return value for failed entry */ + } u; + uint32_t rsvd; /* IN: padding and must be zero */ + uint64_t idx; /* IN: resource address to access */ + uint64_t val; /* IN/OUT: resource value to set/get */ +}; +typedef struct xenpf_resource_entry xenpf_resource_entry_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_resource_entry_t); + +struct xenpf_resource_op { + uint32_t nr_entries; /* number of resource entry */ + uint32_t cpu; /* which cpu to run */ + XEN_GUEST_HANDLE(xenpf_resource_entry_t) entries; +}; +typedef struct xenpf_resource_op xenpf_resource_op_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_resource_op_t); + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_platform_op(const struct xen_platform_op*); + */ +struct xen_platform_op { + uint32_t cmd; + uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ + union { + struct xenpf_settime settime; + struct xenpf_add_memtype add_memtype; + struct xenpf_del_memtype del_memtype; + struct xenpf_read_memtype read_memtype; + struct xenpf_microcode_update microcode; + struct xenpf_platform_quirk platform_quirk; + struct xenpf_efi_runtime_call efi_runtime_call; + struct xenpf_firmware_info firmware_info; + struct xenpf_enter_acpi_sleep enter_acpi_sleep; + struct xenpf_change_freq change_freq; + struct xenpf_getidletime getidletime; + struct xenpf_set_processor_pminfo set_pminfo; + struct xenpf_pcpuinfo pcpu_info; + struct xenpf_pcpu_version pcpu_version; + struct xenpf_cpu_ol cpu_ol; + struct xenpf_cpu_hotadd cpu_add; + struct xenpf_mem_hotadd mem_add; + struct xenpf_core_parking core_parking; + struct xenpf_resource_op resource_op; + uint8_t pad[128]; + } u; +}; +typedef struct xen_platform_op xen_platform_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t); + +#endif /* __XEN_PUBLIC_PLATFORM_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/sched.h xen-4.6.5/extras/mini-os/include/xen/sched.h --- xen-4.6.0/extras/mini-os/include/xen/sched.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/sched.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,175 @@ +/****************************************************************************** + * sched.h + * + * Scheduler state interactions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_SCHED_H__ +#define __XEN_PUBLIC_SCHED_H__ + +#include "event_channel.h" + +/* + * `incontents 150 sched Guest Scheduler Operations + * + * The SCHEDOP interface provides mechanisms for a guest to interact + * with the scheduler, including yield, blocking and shutting itself + * down. + */ + +/* + * The prototype for this hypercall is: + * ` long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...) + * + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == Operation-specific extra argument(s), as described below. + * ... == Additional Operation-specific extra arguments, described below. + * + * Versions of Xen prior to 3.0.2 provided only the following legacy version + * of this hypercall, supporting only the commands yield, block and shutdown: + * long sched_op(int cmd, unsigned long arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) + * == SHUTDOWN_* code (SCHEDOP_shutdown) + * + * This legacy version is available to new guests as: + * ` long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg) + */ + +/* ` enum sched_op { // SCHEDOP_* => struct sched_* */ +/* + * Voluntarily yield the CPU. + * @arg == NULL. + */ +#define SCHEDOP_yield 0 + +/* + * Block execution of this VCPU until an event is received for processing. + * If called with event upcalls masked, this operation will atomically + * reenable event delivery and check for pending events before blocking the + * VCPU. This avoids a "wakeup waiting" race. + * @arg == NULL. + */ +#define SCHEDOP_block 1 + +/* + * Halt execution of this domain (all VCPUs) and notify the system controller. + * @arg == pointer to sched_shutdown_t structure. + * + * If the sched_shutdown_t reason is SHUTDOWN_suspend then + * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN + * of the guest's start info page. RDX/EDX is the third hypercall + * argument. + * + * In addition, which reason is SHUTDOWN_suspend this hypercall + * returns 1 if suspend was cancelled or the domain was merely + * checkpointed, and 0 if it is resuming in a new domain. + */ +#define SCHEDOP_shutdown 2 + +/* + * Poll a set of event-channel ports. Return when one or more are pending. An + * optional timeout may be specified. + * @arg == pointer to sched_poll_t structure. + */ +#define SCHEDOP_poll 3 + +/* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown_t structure. + */ +#define SCHEDOP_remote_shutdown 4 + +/* + * Latch a shutdown code, so that when the domain later shuts down it + * reports this code to the control tools. + * @arg == sched_shutdown_t, as for SCHEDOP_shutdown. + */ +#define SCHEDOP_shutdown_code 5 + +/* + * Setup, poke and destroy a domain watchdog timer. + * @arg == pointer to sched_watchdog_t structure. + * With id == 0, setup a domain watchdog timer to cause domain shutdown + * after timeout, returns watchdog id. + * With id != 0 and timeout == 0, destroy domain watchdog timer. + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. + */ +#define SCHEDOP_watchdog 6 +/* ` } */ + +struct sched_shutdown { + unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */ +}; +typedef struct sched_shutdown sched_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t); + +struct sched_poll { + XEN_GUEST_HANDLE(evtchn_port_t) ports; + unsigned int nr_ports; + uint64_t timeout; +}; +typedef struct sched_poll sched_poll_t; +DEFINE_XEN_GUEST_HANDLE(sched_poll_t); + +struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */ +}; +typedef struct sched_remote_shutdown sched_remote_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); + +struct sched_watchdog { + uint32_t id; /* watchdog ID */ + uint32_t timeout; /* timeout */ +}; +typedef struct sched_watchdog sched_watchdog_t; +DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t); + +/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control + * software to determine the appropriate action. For the most part, Xen does + * not care about the shutdown code. + */ +/* ` enum sched_shutdown_reason { */ +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ +#define SHUTDOWN_MAX 4 /* Maximum valid shutdown reason. */ +/* ` } */ + +#endif /* __XEN_PUBLIC_SCHED_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/sysctl.h xen-4.6.5/extras/mini-os/include/xen/sysctl.h --- xen-4.6.0/extras/mini-os/include/xen/sysctl.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/sysctl.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,719 @@ +/****************************************************************************** + * sysctl.h + * + * System management operations. For use by node control stack. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_SYSCTL_H__ +#define __XEN_PUBLIC_SYSCTL_H__ + +#if !defined(__XEN__) && !defined(__XEN_TOOLS__) +#error "sysctl operations are intended for use by node control tools only" +#endif + +#include "xen.h" +#include "domctl.h" + +#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000B + +/* + * Read console content from Xen buffer ring. + */ +/* XEN_SYSCTL_readconsole */ +struct xen_sysctl_readconsole { + /* IN: Non-zero -> clear after reading. */ + uint8_t clear; + /* IN: Non-zero -> start index specified by @index field. */ + uint8_t incremental; + uint8_t pad0, pad1; + /* + * IN: Start index for consuming from ring buffer (if @incremental); + * OUT: End index after consuming from ring buffer. + */ + uint32_t index; + /* IN: Virtual address to write console data. */ + XEN_GUEST_HANDLE_64(char) buffer; + /* IN: Size of buffer; OUT: Bytes written to buffer. */ + uint32_t count; +}; +typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); + +/* Get trace buffers machine base address */ +/* XEN_SYSCTL_tbuf_op */ +struct xen_sysctl_tbuf_op { + /* IN variables */ +#define XEN_SYSCTL_TBUFOP_get_info 0 +#define XEN_SYSCTL_TBUFOP_set_cpu_mask 1 +#define XEN_SYSCTL_TBUFOP_set_evt_mask 2 +#define XEN_SYSCTL_TBUFOP_set_size 3 +#define XEN_SYSCTL_TBUFOP_enable 4 +#define XEN_SYSCTL_TBUFOP_disable 5 + uint32_t cmd; + /* IN/OUT variables */ + struct xenctl_bitmap cpu_mask; + uint32_t evt_mask; + /* OUT variables */ + uint64_aligned_t buffer_mfn; + uint32_t size; /* Also an IN variable! */ +}; +typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t); + +/* + * Get physical information about the host machine + */ +/* XEN_SYSCTL_physinfo */ + /* (x86) The platform supports HVM guests. */ +#define _XEN_SYSCTL_PHYSCAP_hvm 0 +#define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm) + /* (x86) The platform supports HVM-guest direct access to I/O devices. */ +#define _XEN_SYSCTL_PHYSCAP_hvm_directio 1 +#define XEN_SYSCTL_PHYSCAP_hvm_directio (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio) +struct xen_sysctl_physinfo { + uint32_t threads_per_core; + uint32_t cores_per_socket; + uint32_t nr_cpus; /* # CPUs currently online */ + uint32_t max_cpu_id; /* Largest possible CPU ID on this host */ + uint32_t nr_nodes; /* # nodes currently online */ + uint32_t max_node_id; /* Largest possible node ID on this host */ + uint32_t cpu_khz; + uint64_aligned_t total_pages; + uint64_aligned_t free_pages; + uint64_aligned_t scrub_pages; + uint64_aligned_t outstanding_pages; + uint32_t hw_cap[8]; + + /* XEN_SYSCTL_PHYSCAP_??? */ + uint32_t capabilities; +}; +typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t); + +/* + * Get the ID of the current scheduler. + */ +/* XEN_SYSCTL_sched_id */ +struct xen_sysctl_sched_id { + /* OUT variable */ + uint32_t sched_id; +}; +typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t); + +/* Interface for controlling Xen software performance counters. */ +/* XEN_SYSCTL_perfc_op */ +/* Sub-operations: */ +#define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */ +#define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */ +struct xen_sysctl_perfc_desc { + char name[80]; /* name of perf counter */ + uint32_t nr_vals; /* number of values for this counter */ +}; +typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t); +typedef uint32_t xen_sysctl_perfc_val_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t); + +struct xen_sysctl_perfc_op { + /* IN variables. */ + uint32_t cmd; /* XEN_SYSCTL_PERFCOP_??? */ + /* OUT variables. */ + uint32_t nr_counters; /* number of counters description */ + uint32_t nr_vals; /* number of values */ + /* counter information (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc; + /* counter values (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val; +}; +typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); + +/* XEN_SYSCTL_getdomaininfolist */ +struct xen_sysctl_getdomaininfolist { + /* IN variables. */ + domid_t first_domain; + uint32_t max_domains; + XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; + /* OUT variables. */ + uint32_t num_domains; +}; +typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); + +/* Inject debug keys into Xen. */ +/* XEN_SYSCTL_debug_keys */ +struct xen_sysctl_debug_keys { + /* IN variables. */ + XEN_GUEST_HANDLE_64(char) keys; + uint32_t nr_keys; +}; +typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); + +/* Get physical CPU information. */ +/* XEN_SYSCTL_getcpuinfo */ +struct xen_sysctl_cpuinfo { + uint64_aligned_t idletime; +}; +typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); +struct xen_sysctl_getcpuinfo { + /* IN variables. */ + uint32_t max_cpus; + XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info; + /* OUT variables. */ + uint32_t nr_cpus; +}; +typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); + +/* XEN_SYSCTL_availheap */ +struct xen_sysctl_availheap { + /* IN variables. */ + uint32_t min_bitwidth; /* Smallest address width (zero if don't care). */ + uint32_t max_bitwidth; /* Largest address width (zero if don't care). */ + int32_t node; /* NUMA node of interest (-1 for all nodes). */ + /* OUT variables. */ + uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */ +}; +typedef struct xen_sysctl_availheap xen_sysctl_availheap_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t); + +/* XEN_SYSCTL_get_pmstat */ +struct pm_px_val { + uint64_aligned_t freq; /* Px core frequency */ + uint64_aligned_t residency; /* Px residency time */ + uint64_aligned_t count; /* Px transition count */ +}; +typedef struct pm_px_val pm_px_val_t; +DEFINE_XEN_GUEST_HANDLE(pm_px_val_t); + +struct pm_px_stat { + uint8_t total; /* total Px states */ + uint8_t usable; /* usable Px states */ + uint8_t last; /* last Px state */ + uint8_t cur; /* current Px state */ + XEN_GUEST_HANDLE_64(uint64) trans_pt; /* Px transition table */ + XEN_GUEST_HANDLE_64(pm_px_val_t) pt; +}; +typedef struct pm_px_stat pm_px_stat_t; +DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t); + +struct pm_cx_stat { + uint32_t nr; /* entry nr in triggers & residencies, including C0 */ + uint32_t last; /* last Cx state */ + uint64_aligned_t idle_time; /* idle time from boot */ + XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */ + XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */ + uint32_t nr_pc; /* entry nr in pc[] */ + uint32_t nr_cc; /* entry nr in cc[] */ + /* + * These two arrays may (and generally will) have unused slots; slots not + * having a corresponding hardware register will not be written by the + * hypervisor. It is therefore up to the caller to put a suitable sentinel + * into all slots before invoking the function. + * Indexing is 1-biased (PC1/CC1 being at index 0). + */ + XEN_GUEST_HANDLE_64(uint64) pc; + XEN_GUEST_HANDLE_64(uint64) cc; +}; + +struct xen_sysctl_get_pmstat { +#define PMSTAT_CATEGORY_MASK 0xf0 +#define PMSTAT_PX 0x10 +#define PMSTAT_CX 0x20 +#define PMSTAT_get_max_px (PMSTAT_PX | 0x1) +#define PMSTAT_get_pxstat (PMSTAT_PX | 0x2) +#define PMSTAT_reset_pxstat (PMSTAT_PX | 0x3) +#define PMSTAT_get_max_cx (PMSTAT_CX | 0x1) +#define PMSTAT_get_cxstat (PMSTAT_CX | 0x2) +#define PMSTAT_reset_cxstat (PMSTAT_CX | 0x3) + uint32_t type; + uint32_t cpuid; + union { + struct pm_px_stat getpx; + struct pm_cx_stat getcx; + /* other struct for tx, etc */ + } u; +}; +typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t); + +/* XEN_SYSCTL_cpu_hotplug */ +struct xen_sysctl_cpu_hotplug { + /* IN variables */ + uint32_t cpu; /* Physical cpu. */ +#define XEN_SYSCTL_CPU_HOTPLUG_ONLINE 0 +#define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1 + uint32_t op; /* hotplug opcode */ +}; +typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t); + +/* + * Get/set xen power management, include + * 1. cpufreq governors and related parameters + */ +/* XEN_SYSCTL_pm_op */ +struct xen_userspace { + uint32_t scaling_setspeed; +}; +typedef struct xen_userspace xen_userspace_t; + +struct xen_ondemand { + uint32_t sampling_rate_max; + uint32_t sampling_rate_min; + + uint32_t sampling_rate; + uint32_t up_threshold; +}; +typedef struct xen_ondemand xen_ondemand_t; + +/* + * cpufreq para name of this structure named + * same as sysfs file name of native linux + */ +#define CPUFREQ_NAME_LEN 16 +struct xen_get_cpufreq_para { + /* IN/OUT variable */ + uint32_t cpu_num; + uint32_t freq_num; + uint32_t gov_num; + + /* for all governors */ + /* OUT variable */ + XEN_GUEST_HANDLE_64(uint32) affected_cpus; + XEN_GUEST_HANDLE_64(uint32) scaling_available_frequencies; + XEN_GUEST_HANDLE_64(char) scaling_available_governors; + char scaling_driver[CPUFREQ_NAME_LEN]; + + uint32_t cpuinfo_cur_freq; + uint32_t cpuinfo_max_freq; + uint32_t cpuinfo_min_freq; + uint32_t scaling_cur_freq; + + char scaling_governor[CPUFREQ_NAME_LEN]; + uint32_t scaling_max_freq; + uint32_t scaling_min_freq; + + /* for specific governor */ + union { + struct xen_userspace userspace; + struct xen_ondemand ondemand; + } u; + + int32_t turbo_enabled; +}; + +struct xen_set_cpufreq_gov { + char scaling_governor[CPUFREQ_NAME_LEN]; +}; + +struct xen_set_cpufreq_para { + #define SCALING_MAX_FREQ 1 + #define SCALING_MIN_FREQ 2 + #define SCALING_SETSPEED 3 + #define SAMPLING_RATE 4 + #define UP_THRESHOLD 5 + + uint32_t ctrl_type; + uint32_t ctrl_value; +}; + +struct xen_sysctl_pm_op { + #define PM_PARA_CATEGORY_MASK 0xf0 + #define CPUFREQ_PARA 0x10 + + /* cpufreq command type */ + #define GET_CPUFREQ_PARA (CPUFREQ_PARA | 0x01) + #define SET_CPUFREQ_GOV (CPUFREQ_PARA | 0x02) + #define SET_CPUFREQ_PARA (CPUFREQ_PARA | 0x03) + #define GET_CPUFREQ_AVGFREQ (CPUFREQ_PARA | 0x04) + + /* set/reset scheduler power saving option */ + #define XEN_SYSCTL_pm_op_set_sched_opt_smt 0x21 + + /* cpuidle max_cstate access command */ + #define XEN_SYSCTL_pm_op_get_max_cstate 0x22 + #define XEN_SYSCTL_pm_op_set_max_cstate 0x23 + + /* set scheduler migration cost value */ + #define XEN_SYSCTL_pm_op_set_vcpu_migration_delay 0x24 + #define XEN_SYSCTL_pm_op_get_vcpu_migration_delay 0x25 + + /* enable/disable turbo mode when in dbs governor */ + #define XEN_SYSCTL_pm_op_enable_turbo 0x26 + #define XEN_SYSCTL_pm_op_disable_turbo 0x27 + + uint32_t cmd; + uint32_t cpuid; + union { + struct xen_get_cpufreq_para get_para; + struct xen_set_cpufreq_gov set_gov; + struct xen_set_cpufreq_para set_para; + uint64_aligned_t get_avgfreq; + uint32_t set_sched_opt_smt; + uint32_t get_max_cstate; + uint32_t set_max_cstate; + uint32_t get_vcpu_migration_delay; + uint32_t set_vcpu_migration_delay; + } u; +}; + +/* XEN_SYSCTL_page_offline_op */ +struct xen_sysctl_page_offline_op { + /* IN: range of page to be offlined */ +#define sysctl_page_offline 1 +#define sysctl_page_online 2 +#define sysctl_query_page_offline 3 + uint32_t cmd; + uint32_t start; + uint32_t end; + /* OUT: result of page offline request */ + /* + * bit 0~15: result flags + * bit 16~31: owner + */ + XEN_GUEST_HANDLE(uint32) status; +}; + +#define PG_OFFLINE_STATUS_MASK (0xFFUL) + +/* The result is invalid, i.e. HV does not handle it */ +#define PG_OFFLINE_INVALID (0x1UL << 0) + +#define PG_OFFLINE_OFFLINED (0x1UL << 1) +#define PG_OFFLINE_PENDING (0x1UL << 2) +#define PG_OFFLINE_FAILED (0x1UL << 3) +#define PG_OFFLINE_AGAIN (0x1UL << 4) + +#define PG_ONLINE_FAILED PG_OFFLINE_FAILED +#define PG_ONLINE_ONLINED PG_OFFLINE_OFFLINED + +#define PG_OFFLINE_STATUS_OFFLINED (0x1UL << 1) +#define PG_OFFLINE_STATUS_ONLINE (0x1UL << 2) +#define PG_OFFLINE_STATUS_OFFLINE_PENDING (0x1UL << 3) +#define PG_OFFLINE_STATUS_BROKEN (0x1UL << 4) + +#define PG_OFFLINE_MISC_MASK (0xFFUL << 4) + +/* valid when PG_OFFLINE_FAILED or PG_OFFLINE_PENDING */ +#define PG_OFFLINE_XENPAGE (0x1UL << 8) +#define PG_OFFLINE_DOM0PAGE (0x1UL << 9) +#define PG_OFFLINE_ANONYMOUS (0x1UL << 10) +#define PG_OFFLINE_NOT_CONV_RAM (0x1UL << 11) +#define PG_OFFLINE_OWNED (0x1UL << 12) + +#define PG_OFFLINE_BROKEN (0x1UL << 13) +#define PG_ONLINE_BROKEN PG_OFFLINE_BROKEN + +#define PG_OFFLINE_OWNER_SHIFT 16 + +/* XEN_SYSCTL_lockprof_op */ +/* Sub-operations: */ +#define XEN_SYSCTL_LOCKPROF_reset 1 /* Reset all profile data to zero. */ +#define XEN_SYSCTL_LOCKPROF_query 2 /* Get lock profile information. */ +/* Record-type: */ +#define LOCKPROF_TYPE_GLOBAL 0 /* global lock, idx meaningless */ +#define LOCKPROF_TYPE_PERDOM 1 /* per-domain lock, idx is domid */ +#define LOCKPROF_TYPE_N 2 /* number of types */ +struct xen_sysctl_lockprof_data { + char name[40]; /* lock name (may include up to 2 %d specifiers) */ + int32_t type; /* LOCKPROF_TYPE_??? */ + int32_t idx; /* index (e.g. domain id) */ + uint64_aligned_t lock_cnt; /* # of locking succeeded */ + uint64_aligned_t block_cnt; /* # of wait for lock */ + uint64_aligned_t lock_time; /* nsecs lock held */ + uint64_aligned_t block_time; /* nsecs waited for lock */ +}; +typedef struct xen_sysctl_lockprof_data xen_sysctl_lockprof_data_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_data_t); +struct xen_sysctl_lockprof_op { + /* IN variables. */ + uint32_t cmd; /* XEN_SYSCTL_LOCKPROF_??? */ + uint32_t max_elem; /* size of output buffer */ + /* OUT variables (query only). */ + uint32_t nr_elem; /* number of elements available */ + uint64_aligned_t time; /* nsecs of profile measurement */ + /* profile information (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_lockprof_data_t) data; +}; +typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t); + +/* XEN_SYSCTL_topologyinfo */ +#define INVALID_TOPOLOGY_ID (~0U) +struct xen_sysctl_topologyinfo { + /* + * IN: maximum addressable entry in the caller-provided arrays. + * OUT: largest cpu identifier in the system. + * If OUT is greater than IN then the arrays are truncated! + * If OUT is leass than IN then the array tails are not written by sysctl. + */ + uint32_t max_cpu_index; + + /* + * If not NULL, these arrays are filled with core/socket/node identifier + * for each cpu. + * If a cpu has no core/socket/node information (e.g., cpu not present) + * then the sentinel value ~0u is written to each array. + * The number of array elements written by the sysctl is: + * min(@max_cpu_index_IN,@max_cpu_index_OUT)+1 + */ + XEN_GUEST_HANDLE_64(uint32) cpu_to_core; + XEN_GUEST_HANDLE_64(uint32) cpu_to_socket; + XEN_GUEST_HANDLE_64(uint32) cpu_to_node; +}; +typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t); + +/* XEN_SYSCTL_numainfo */ +#define INVALID_NUMAINFO_ID (~0U) +struct xen_sysctl_numainfo { + /* + * IN: maximum addressable entry in the caller-provided arrays. + * OUT: largest node identifier in the system. + * If OUT is greater than IN then the arrays are truncated! + */ + uint32_t max_node_index; + + /* NB. Entries are 0 if node is not present. */ + XEN_GUEST_HANDLE_64(uint64) node_to_memsize; + XEN_GUEST_HANDLE_64(uint64) node_to_memfree; + + /* + * Array, of size (max_node_index+1)^2, listing memory access distances + * between nodes. If an entry has no node distance information (e.g., node + * not present) then the value ~0u is written. + * + * Note that the array rows must be indexed by multiplying by the minimum + * of the caller-provided max_node_index and the returned value of + * max_node_index. That is, if the largest node index in the system is + * smaller than the caller can handle, a smaller 2-d array is constructed + * within the space provided by the caller. When this occurs, trailing + * space provided by the caller is not modified. If the largest node index + * in the system is larger than the caller can handle, then a 2-d array of + * the maximum size handleable by the caller is constructed. + */ + XEN_GUEST_HANDLE_64(uint32) node_to_node_distance; +}; +typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t); + +/* XEN_SYSCTL_cpupool_op */ +#define XEN_SYSCTL_CPUPOOL_OP_CREATE 1 /* C */ +#define XEN_SYSCTL_CPUPOOL_OP_DESTROY 2 /* D */ +#define XEN_SYSCTL_CPUPOOL_OP_INFO 3 /* I */ +#define XEN_SYSCTL_CPUPOOL_OP_ADDCPU 4 /* A */ +#define XEN_SYSCTL_CPUPOOL_OP_RMCPU 5 /* R */ +#define XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */ +#define XEN_SYSCTL_CPUPOOL_OP_FREEINFO 7 /* F */ +#define XEN_SYSCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF +struct xen_sysctl_cpupool_op { + uint32_t op; /* IN */ + uint32_t cpupool_id; /* IN: CDIARM OUT: CI */ + uint32_t sched_id; /* IN: C OUT: I */ + uint32_t domid; /* IN: M */ + uint32_t cpu; /* IN: AR */ + uint32_t n_dom; /* OUT: I */ + struct xenctl_bitmap cpumap; /* OUT: IF */ +}; +typedef struct xen_sysctl_cpupool_op xen_sysctl_cpupool_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpupool_op_t); + +#define ARINC653_MAX_DOMAINS_PER_SCHEDULE 64 +/* + * This structure is used to pass a new ARINC653 schedule from a + * privileged domain (ie dom0) to Xen. + */ +struct xen_sysctl_arinc653_schedule { + /* major_frame holds the time for the new schedule's major frame + * in nanoseconds. */ + uint64_aligned_t major_frame; + /* num_sched_entries holds how many of the entries in the + * sched_entries[] array are valid. */ + uint8_t num_sched_entries; + /* The sched_entries array holds the actual schedule entries. */ + struct { + /* dom_handle must match a domain's UUID */ + xen_domain_handle_t dom_handle; + /* If a domain has multiple VCPUs, vcpu_id specifies which one + * this schedule entry applies to. It should be set to 0 if + * there is only one VCPU for the domain. */ + unsigned int vcpu_id; + /* runtime specifies the amount of time that should be allocated + * to this VCPU per major frame. It is specified in nanoseconds */ + uint64_aligned_t runtime; + } sched_entries[ARINC653_MAX_DOMAINS_PER_SCHEDULE]; +}; +typedef struct xen_sysctl_arinc653_schedule xen_sysctl_arinc653_schedule_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_arinc653_schedule_t); + +struct xen_sysctl_credit_schedule { + /* Length of timeslice in milliseconds */ +#define XEN_SYSCTL_CSCHED_TSLICE_MAX 1000 +#define XEN_SYSCTL_CSCHED_TSLICE_MIN 1 + unsigned tslice_ms; + /* Rate limit (minimum timeslice) in microseconds */ +#define XEN_SYSCTL_SCHED_RATELIMIT_MAX 500000 +#define XEN_SYSCTL_SCHED_RATELIMIT_MIN 100 + unsigned ratelimit_us; +}; +typedef struct xen_sysctl_credit_schedule xen_sysctl_credit_schedule_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_credit_schedule_t); + +/* XEN_SYSCTL_scheduler_op */ +/* Set or get info? */ +#define XEN_SYSCTL_SCHEDOP_putinfo 0 +#define XEN_SYSCTL_SCHEDOP_getinfo 1 +struct xen_sysctl_scheduler_op { + uint32_t cpupool_id; /* Cpupool whose scheduler is to be targetted. */ + uint32_t sched_id; /* XEN_SCHEDULER_* (domctl.h) */ + uint32_t cmd; /* XEN_SYSCTL_SCHEDOP_* */ + union { + struct xen_sysctl_sched_arinc653 { + XEN_GUEST_HANDLE_64(xen_sysctl_arinc653_schedule_t) schedule; + } sched_arinc653; + struct xen_sysctl_credit_schedule sched_credit; + } u; +}; +typedef struct xen_sysctl_scheduler_op xen_sysctl_scheduler_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_scheduler_op_t); + +/* XEN_SYSCTL_coverage_op */ +/* + * Get total size of information, to help allocate + * the buffer. The pointer points to a 32 bit value. + */ +#define XEN_SYSCTL_COVERAGE_get_total_size 0 + +/* + * Read coverage information in a single run + * You must use a tool to split them. + */ +#define XEN_SYSCTL_COVERAGE_read 1 + +/* + * Reset all the coverage counters to 0 + * No parameters. + */ +#define XEN_SYSCTL_COVERAGE_reset 2 + +/* + * Like XEN_SYSCTL_COVERAGE_read but reset also + * counters to 0 in a single call. + */ +#define XEN_SYSCTL_COVERAGE_read_and_reset 3 + +struct xen_sysctl_coverage_op { + uint32_t cmd; /* XEN_SYSCTL_COVERAGE_* */ + union { + uint32_t total_size; /* OUT */ + XEN_GUEST_HANDLE_64(uint8) raw_info; /* OUT */ + } u; +}; +typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t); + +#define XEN_SYSCTL_PSR_CMT_get_total_rmid 0 +#define XEN_SYSCTL_PSR_CMT_get_l3_upscaling_factor 1 +/* The L3 cache size is returned in KB unit */ +#define XEN_SYSCTL_PSR_CMT_get_l3_cache_size 2 +#define XEN_SYSCTL_PSR_CMT_enabled 3 +#define XEN_SYSCTL_PSR_CMT_get_l3_event_mask 4 +struct xen_sysctl_psr_cmt_op { + uint32_t cmd; /* IN: XEN_SYSCTL_PSR_CMT_* */ + uint32_t flags; /* padding variable, may be extended for future use */ + union { + uint64_t data; /* OUT */ + struct { + uint32_t cpu; /* IN */ + uint32_t rsvd; + } l3_cache; + } u; +}; +typedef struct xen_sysctl_psr_cmt_op xen_sysctl_psr_cmt_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_psr_cmt_op_t); + +struct xen_sysctl { + uint32_t cmd; +#define XEN_SYSCTL_readconsole 1 +#define XEN_SYSCTL_tbuf_op 2 +#define XEN_SYSCTL_physinfo 3 +#define XEN_SYSCTL_sched_id 4 +#define XEN_SYSCTL_perfc_op 5 +#define XEN_SYSCTL_getdomaininfolist 6 +#define XEN_SYSCTL_debug_keys 7 +#define XEN_SYSCTL_getcpuinfo 8 +#define XEN_SYSCTL_availheap 9 +#define XEN_SYSCTL_get_pmstat 10 +#define XEN_SYSCTL_cpu_hotplug 11 +#define XEN_SYSCTL_pm_op 12 +#define XEN_SYSCTL_page_offline_op 14 +#define XEN_SYSCTL_lockprof_op 15 +#define XEN_SYSCTL_topologyinfo 16 +#define XEN_SYSCTL_numainfo 17 +#define XEN_SYSCTL_cpupool_op 18 +#define XEN_SYSCTL_scheduler_op 19 +#define XEN_SYSCTL_coverage_op 20 +#define XEN_SYSCTL_psr_cmt_op 21 + uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ + union { + struct xen_sysctl_readconsole readconsole; + struct xen_sysctl_tbuf_op tbuf_op; + struct xen_sysctl_physinfo physinfo; + struct xen_sysctl_topologyinfo topologyinfo; + struct xen_sysctl_numainfo numainfo; + struct xen_sysctl_sched_id sched_id; + struct xen_sysctl_perfc_op perfc_op; + struct xen_sysctl_getdomaininfolist getdomaininfolist; + struct xen_sysctl_debug_keys debug_keys; + struct xen_sysctl_getcpuinfo getcpuinfo; + struct xen_sysctl_availheap availheap; + struct xen_sysctl_get_pmstat get_pmstat; + struct xen_sysctl_cpu_hotplug cpu_hotplug; + struct xen_sysctl_pm_op pm_op; + struct xen_sysctl_page_offline_op page_offline; + struct xen_sysctl_lockprof_op lockprof_op; + struct xen_sysctl_cpupool_op cpupool_op; + struct xen_sysctl_scheduler_op scheduler_op; + struct xen_sysctl_coverage_op coverage_op; + struct xen_sysctl_psr_cmt_op psr_cmt_op; + uint8_t pad[128]; + } u; +}; +typedef struct xen_sysctl xen_sysctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t); + +#endif /* __XEN_PUBLIC_SYSCTL_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/tmem.h xen-4.6.5/extras/mini-os/include/xen/tmem.h --- xen-4.6.0/extras/mini-os/include/xen/tmem.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/tmem.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,152 @@ +/****************************************************************************** + * tmem.h + * + * Guest OS interface to Xen Transcendent Memory. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_TMEM_H__ +#define __XEN_PUBLIC_TMEM_H__ + +#include "xen.h" + +/* version of ABI */ +#define TMEM_SPEC_VERSION 1 + +/* Commands to HYPERVISOR_tmem_op() */ +#define TMEM_CONTROL 0 +#define TMEM_NEW_POOL 1 +#define TMEM_DESTROY_POOL 2 +#define TMEM_PUT_PAGE 4 +#define TMEM_GET_PAGE 5 +#define TMEM_FLUSH_PAGE 6 +#define TMEM_FLUSH_OBJECT 7 +#if __XEN_INTERFACE_VERSION__ < 0x00040400 +#define TMEM_NEW_PAGE 3 +#define TMEM_READ 8 +#define TMEM_WRITE 9 +#define TMEM_XCHG 10 +#endif + +/* Privileged commands to HYPERVISOR_tmem_op() */ +#define TMEM_AUTH 101 +#define TMEM_RESTORE_NEW 102 + +/* Subops for HYPERVISOR_tmem_op(TMEM_CONTROL) */ +#define TMEMC_THAW 0 +#define TMEMC_FREEZE 1 +#define TMEMC_FLUSH 2 +#define TMEMC_DESTROY 3 +#define TMEMC_LIST 4 +#define TMEMC_SET_WEIGHT 5 +#define TMEMC_SET_CAP 6 +#define TMEMC_SET_COMPRESS 7 +#define TMEMC_QUERY_FREEABLE_MB 8 +#define TMEMC_SAVE_BEGIN 10 +#define TMEMC_SAVE_GET_VERSION 11 +#define TMEMC_SAVE_GET_MAXPOOLS 12 +#define TMEMC_SAVE_GET_CLIENT_WEIGHT 13 +#define TMEMC_SAVE_GET_CLIENT_CAP 14 +#define TMEMC_SAVE_GET_CLIENT_FLAGS 15 +#define TMEMC_SAVE_GET_POOL_FLAGS 16 +#define TMEMC_SAVE_GET_POOL_NPAGES 17 +#define TMEMC_SAVE_GET_POOL_UUID 18 +#define TMEMC_SAVE_GET_NEXT_PAGE 19 +#define TMEMC_SAVE_GET_NEXT_INV 20 +#define TMEMC_SAVE_END 21 +#define TMEMC_RESTORE_BEGIN 30 +#define TMEMC_RESTORE_PUT_PAGE 32 +#define TMEMC_RESTORE_FLUSH_PAGE 33 + +/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ +#define TMEM_POOL_PERSIST 1 +#define TMEM_POOL_SHARED 2 +#define TMEM_POOL_PRECOMPRESSED 4 +#define TMEM_POOL_PAGESIZE_SHIFT 4 +#define TMEM_POOL_PAGESIZE_MASK 0xf +#define TMEM_POOL_VERSION_SHIFT 24 +#define TMEM_POOL_VERSION_MASK 0xff +#define TMEM_POOL_RESERVED_BITS 0x00ffff00 + +/* Bits for client flags (save/restore) */ +#define TMEM_CLIENT_COMPRESS 1 +#define TMEM_CLIENT_FROZEN 2 + +/* Special errno values */ +#define EFROZEN 1000 +#define EEMPTY 1001 + + +#ifndef __ASSEMBLY__ +#if __XEN_INTERFACE_VERSION__ < 0x00040400 +typedef xen_pfn_t tmem_cli_mfn_t; +#endif +typedef XEN_GUEST_HANDLE(char) tmem_cli_va_t; +struct tmem_op { + uint32_t cmd; + int32_t pool_id; + union { + struct { + uint64_t uuid[2]; + uint32_t flags; + uint32_t arg1; + } creat; /* for cmd == TMEM_NEW_POOL, TMEM_AUTH, TMEM_RESTORE_NEW */ + struct { + uint32_t subop; + uint32_t cli_id; + uint32_t arg1; + uint32_t arg2; + uint64_t oid[3]; + tmem_cli_va_t buf; + } ctrl; /* for cmd == TMEM_CONTROL */ + struct { + + uint64_t oid[3]; + uint32_t index; + uint32_t tmem_offset; + uint32_t pfn_offset; + uint32_t len; + xen_pfn_t cmfn; /* client machine page frame */ + } gen; /* for all other cmd ("generic") */ + } u; +}; +typedef struct tmem_op tmem_op_t; +DEFINE_XEN_GUEST_HANDLE(tmem_op_t); + +struct tmem_handle { + uint32_t pool_id; + uint32_t index; + uint64_t oid[3]; +}; +#endif + +#endif /* __XEN_PUBLIC_TMEM_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/trace.h xen-4.6.5/extras/mini-os/include/xen/trace.h --- xen-4.6.0/extras/mini-os/include/xen/trace.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/trace.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,331 @@ +/****************************************************************************** + * include/public/trace.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Mark Williamson, (C) 2004 Intel Research Cambridge + * Copyright (C) 2005 Bin Ren + */ + +#ifndef __XEN_PUBLIC_TRACE_H__ +#define __XEN_PUBLIC_TRACE_H__ + +#define TRACE_EXTRA_MAX 7 +#define TRACE_EXTRA_SHIFT 28 + +/* Trace classes */ +#define TRC_CLS_SHIFT 16 +#define TRC_GEN 0x0001f000 /* General trace */ +#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ +#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ +#define TRC_HVM 0x0008f000 /* Xen HVM trace */ +#define TRC_MEM 0x0010f000 /* Xen memory trace */ +#define TRC_PV 0x0020f000 /* Xen PV traces */ +#define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */ +#define TRC_HW 0x0080f000 /* Xen hardware-related traces */ +#define TRC_GUEST 0x0800f000 /* Guest-generated traces */ +#define TRC_ALL 0x0ffff000 +#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) +#define TRC_HD_CYCLE_FLAG (1UL<<31) +#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) ) +#define TRC_HD_EXTRA(x) (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX) + +/* Trace subclasses */ +#define TRC_SUBCLS_SHIFT 12 + +/* trace subclasses for SVM */ +#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ +#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ +#define TRC_HVM_EMUL 0x00084000 /* emulated devices */ + +#define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ +#define TRC_SCHED_CLASS 0x00022000 /* Scheduler-specific */ +#define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */ + +/* + * The highest 3 bits of the last 12 bits of TRC_SCHED_CLASS above are + * reserved for encoding what scheduler produced the information. The + * actual event is encoded in the last 9 bits. + * + * This means we have 8 scheduling IDs available (which means at most 8 + * schedulers generating events) and, in each scheduler, up to 512 + * different events. + */ +#define TRC_SCHED_ID_BITS 3 +#define TRC_SCHED_ID_SHIFT (TRC_SUBCLS_SHIFT - TRC_SCHED_ID_BITS) +#define TRC_SCHED_ID_MASK (((1UL<cpu_offset[cpu]). + */ +struct t_info { + uint16_t tbuf_size; /* Size in pages of each trace buffer */ + uint16_t mfn_offset[]; /* Offset within t_info structure of the page list per cpu */ + /* MFN lists immediately after the header */ +}; + +#endif /* __XEN_PUBLIC_TRACE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/vcpu.h xen-4.6.5/extras/mini-os/include/xen/vcpu.h --- xen-4.6.0/extras/mini-os/include/xen/vcpu.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/vcpu.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,240 @@ +/****************************************************************************** + * vcpu.h + * + * VCPU initialisation, query, and hotplug. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_VCPU_H__ +#define __XEN_PUBLIC_VCPU_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * long vcpu_op(int cmd, unsigned int vcpuid, void *extra_args) + * @cmd == VCPUOP_??? (VCPU operation). + * @vcpuid == VCPU to operate on. + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Initialise a VCPU. Each VCPU can be initialised only once. A + * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. + * + * @extra_arg == pointer to vcpu_guest_context structure containing initial + * state for the VCPU. + */ +#define VCPUOP_initialise 0 + +/* + * Bring up a VCPU. This makes the VCPU runnable. This operation will fail + * if the VCPU has not been initialised (VCPUOP_initialise). + */ +#define VCPUOP_up 1 + +/* + * Bring down a VCPU (i.e., make it non-runnable). + * There are a few caveats that callers should observe: + * 1. This operation may return, and VCPU_is_up may return false, before the + * VCPU stops running (i.e., the command is asynchronous). It is a good + * idea to ensure that the VCPU has entered a non-critical loop before + * bringing it down. Alternatively, this operation is guaranteed + * synchronous if invoked by the VCPU itself. + * 2. After a VCPU is initialised, there is currently no way to drop all its + * references to domain memory. Even a VCPU that is down still holds + * memory references via its pagetable base pointer and GDT. It is good + * practise to move a VCPU onto an 'idle' or default page table, LDT and + * GDT before bringing it down. + */ +#define VCPUOP_down 2 + +/* Returns 1 if the given VCPU is up. */ +#define VCPUOP_is_up 3 + +/* + * Return information about the state and running time of a VCPU. + * @extra_arg == pointer to vcpu_runstate_info structure. + */ +#define VCPUOP_get_runstate_info 4 +struct vcpu_runstate_info { + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; +}; +typedef struct vcpu_runstate_info vcpu_runstate_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t); + +/* VCPU is currently running on a physical CPU. */ +#define RUNSTATE_running 0 + +/* VCPU is runnable, but not currently scheduled on any physical CPU. */ +#define RUNSTATE_runnable 1 + +/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */ +#define RUNSTATE_blocked 2 + +/* + * VCPU is not runnable, but it is not blocked. + * This is a 'catch all' state for things like hotplug and pauses by the + * system administrator (or for critical sections in the hypervisor). + * RUNSTATE_blocked dominates this state (it is the preferred state). + */ +#define RUNSTATE_offline 3 + +/* + * Register a shared memory area from which the guest may obtain its own + * runstate information without needing to execute a hypercall. + * Notes: + * 1. The registered address may be virtual or physical or guest handle, + * depending on the platform. Virtual address or guest handle should be + * registered on x86 systems. + * 2. Only one shared area may be registered per VCPU. The shared area is + * updated by the hypervisor each time the VCPU is scheduled. Thus + * runstate.state will always be RUNSTATE_running and + * runstate.state_entry_time will indicate the system time at which the + * VCPU was last scheduled to run. + * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. + */ +#define VCPUOP_register_runstate_memory_area 5 +struct vcpu_register_runstate_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_runstate_info_t) h; + struct vcpu_runstate_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t); + +/* + * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer + * which can be set via these commands. Periods smaller than one millisecond + * may not be supported. + */ +#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ +#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ +struct vcpu_set_periodic_timer { + uint64_t period_ns; +}; +typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t); + +/* + * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot + * timer which can be set via these commands. + */ +#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ +#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */ +struct vcpu_set_singleshot_timer { + uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */ + uint32_t flags; /* VCPU_SSHOTTMR_??? */ +}; +typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t); + +/* Flags to VCPUOP_set_singleshot_timer. */ + /* Require the timeout to be in the future (return -ETIME if it's passed). */ +#define _VCPU_SSHOTTMR_future (0) +#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) + +/* + * Register a memory location in the guest address space for the + * vcpu_info structure. This allows the guest to place the vcpu_info + * structure in a convenient place, such as in a per-cpu data area. + * The pointer need not be page aligned, but the structure must not + * cross a page boundary. + * + * This may be called only once per vcpu. + */ +#define VCPUOP_register_vcpu_info 10 /* arg == vcpu_register_vcpu_info_t */ +struct vcpu_register_vcpu_info { + uint64_t mfn; /* mfn of page to place vcpu_info */ + uint32_t offset; /* offset within page */ + uint32_t rsvd; /* unused */ +}; +typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t); + +/* Send an NMI to the specified VCPU. @extra_arg == NULL. */ +#define VCPUOP_send_nmi 11 + +/* + * Get the physical ID information for a pinned vcpu's underlying physical + * processor. The physical ID informmation is architecture-specific. + * On x86: id[31:0]=apic_id, id[63:32]=acpi_id. + * This command returns -EINVAL if it is not a valid operation for this VCPU. + */ +#define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ +struct vcpu_get_physid { + uint64_t phys_id; +}; +typedef struct vcpu_get_physid vcpu_get_physid_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t); +#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid)) +#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32)) + +/* + * Register a memory location to get a secondary copy of the vcpu time + * parameters. The master copy still exists as part of the vcpu shared + * memory area, and this secondary copy is updated whenever the master copy + * is updated (and using the same versioning scheme for synchronisation). + * + * The intent is that this copy may be mapped (RO) into userspace so + * that usermode can compute system time using the time info and the + * tsc. Usermode will see an array of vcpu_time_info structures, one + * for each vcpu, and choose the right one by an existing mechanism + * which allows it to get the current vcpu number (such as via a + * segment limit). It can then apply the normal algorithm to compute + * system time from the tsc. + * + * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. + */ +#define VCPUOP_register_vcpu_time_memory_area 13 +DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t); +struct vcpu_register_time_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_time_info_t) h; + struct vcpu_time_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t); + +#endif /* __XEN_PUBLIC_VCPU_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/version.h xen-4.6.5/extras/mini-os/include/xen/version.h --- xen-4.6.0/extras/mini-os/include/xen/version.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/version.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,96 @@ +/****************************************************************************** + * version.h + * + * Xen version, type, and compile information. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Nguyen Anh Quynh + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_VERSION_H__ +#define __XEN_PUBLIC_VERSION_H__ + +#include "xen.h" + +/* NB. All ops return zero on success, except XENVER_{version,pagesize} */ + +/* arg == NULL; returns major:minor (16:16). */ +#define XENVER_version 0 + +/* arg == xen_extraversion_t. */ +#define XENVER_extraversion 1 +typedef char xen_extraversion_t[16]; +#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t)) + +/* arg == xen_compile_info_t. */ +#define XENVER_compile_info 2 +struct xen_compile_info { + char compiler[64]; + char compile_by[16]; + char compile_domain[32]; + char compile_date[32]; +}; +typedef struct xen_compile_info xen_compile_info_t; + +#define XENVER_capabilities 3 +typedef char xen_capabilities_info_t[1024]; +#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t)) + +#define XENVER_changeset 4 +typedef char xen_changeset_info_t[64]; +#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t)) + +#define XENVER_platform_parameters 5 +struct xen_platform_parameters { + xen_ulong_t virt_start; +}; +typedef struct xen_platform_parameters xen_platform_parameters_t; + +#define XENVER_get_features 6 +struct xen_feature_info { + unsigned int submap_idx; /* IN: which 32-bit submap to return */ + uint32_t submap; /* OUT: 32-bit submap */ +}; +typedef struct xen_feature_info xen_feature_info_t; + +/* Declares the features reported by XENVER_get_features. */ +#include "features.h" + +/* arg == NULL; returns host memory page size. */ +#define XENVER_pagesize 7 + +/* arg == xen_domain_handle_t. */ +#define XENVER_guest_handle 8 + +#define XENVER_commandline 9 +typedef char xen_commandline_t[1024]; + +#endif /* __XEN_PUBLIC_VERSION_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/xencomm.h xen-4.6.5/extras/mini-os/include/xen/xencomm.h --- xen-4.6.0/extras/mini-os/include/xen/xencomm.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/xencomm.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,41 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) IBM Corp. 2006 + */ + +#ifndef _XEN_XENCOMM_H_ +#define _XEN_XENCOMM_H_ + +/* A xencomm descriptor is a scatter/gather list containing physical + * addresses corresponding to a virtually contiguous memory area. The + * hypervisor translates these physical addresses to machine addresses to copy + * to and from the virtually contiguous area. + */ + +#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */ +#define XENCOMM_INVALID (~0UL) + +struct xencomm_desc { + uint32_t magic; + uint32_t nr_addrs; /* the number of entries in address[] */ + uint64_t address[0]; +}; + +#endif /* _XEN_XENCOMM_H_ */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/xen-compat.h xen-4.6.5/extras/mini-os/include/xen/xen-compat.h --- xen-4.6.0/extras/mini-os/include/xen/xen-compat.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/xen-compat.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,44 @@ +/****************************************************************************** + * xen-compat.h + * + * Guest OS interface to Xen. Compatibility layer. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Christian Limpach + */ + +#ifndef __XEN_PUBLIC_XEN_COMPAT_H__ +#define __XEN_PUBLIC_XEN_COMPAT_H__ + +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040600 + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +/* Xen is built with matching headers and implements the latest interface. */ +#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ +#elif !defined(__XEN_INTERFACE_VERSION__) +/* Guests which do not specify a version get the legacy interface. */ +#define __XEN_INTERFACE_VERSION__ 0x00000000 +#endif + +#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__ +#error "These header files do not support the requested interface version." +#endif + +#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/xen.h xen-4.6.5/extras/mini-os/include/xen/xen.h --- xen-4.6.0/extras/mini-os/include/xen/xen.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/xen.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,899 @@ +/****************************************************************************** + * xen.h + * + * Guest OS interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_XEN_H__ +#define __XEN_PUBLIC_XEN_H__ + +#include "xen-compat.h" + +#if defined(__i386__) || defined(__x86_64__) +#include "arch-x86/xen.h" +#elif defined(__arm__) || defined (__aarch64__) +#include "arch-arm.h" +#else +#error "Unsupported architecture" +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +DEFINE_XEN_GUEST_HANDLE(char); +__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); +DEFINE_XEN_GUEST_HANDLE(int); +__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +#if __XEN_INTERFACE_VERSION__ < 0x00040300 +DEFINE_XEN_GUEST_HANDLE(long); +__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +#endif +DEFINE_XEN_GUEST_HANDLE(void); + +DEFINE_XEN_GUEST_HANDLE(uint64_t); +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); +DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); +#endif + +/* + * HYPERCALLS + */ + +/* `incontents 100 hcalls List of hypercalls + * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*() + */ + +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */ +#define __HYPERVISOR_platform_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */ +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */ +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_xsm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 +#define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ + +/* Architecture-specific hypercall definitions. */ +#define __HYPERVISOR_arch_0 48 +#define __HYPERVISOR_arch_1 49 +#define __HYPERVISOR_arch_2 50 +#define __HYPERVISOR_arch_3 51 +#define __HYPERVISOR_arch_4 52 +#define __HYPERVISOR_arch_5 53 +#define __HYPERVISOR_arch_6 54 +#define __HYPERVISOR_arch_7 55 + +/* ` } */ + +/* + * HYPERCALL COMPATIBILITY. + */ + +/* New sched_op hypercall introduced in 0x00030101. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030101 +#undef __HYPERVISOR_sched_op +#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat +#endif + +/* New event-channel and physdev hypercalls introduced in 0x00030202. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030202 +#undef __HYPERVISOR_event_channel_op +#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat +#undef __HYPERVISOR_physdev_op +#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat +#endif + +/* New platform_op hypercall introduced in 0x00030204. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030204 +#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op +#endif + +/* + * VIRTUAL INTERRUPTS + * + * Virtual interrupts that a guest OS may receive from Xen. + * + * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a + * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. + * The latter can be allocated only once per guest: they must initially be + * allocated to VCPU0 but can subsequently be re-bound. + */ +/* ` enum virq { */ +#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ +#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ +#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ +#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ +#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ +#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ +#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ +#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ +#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ +#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ +#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ +#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ + +/* Architecture-specific VIRQ definitions. */ +#define VIRQ_ARCH_0 16 +#define VIRQ_ARCH_1 17 +#define VIRQ_ARCH_2 18 +#define VIRQ_ARCH_3 19 +#define VIRQ_ARCH_4 20 +#define VIRQ_ARCH_5 21 +#define VIRQ_ARCH_6 22 +#define VIRQ_ARCH_7 23 +/* ` } */ + +#define NR_VIRQS 24 + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[], + * ` unsigned count, unsigned *done_out, + * ` unsigned foreigndom) + * ` + * @reqs is an array of mmu_update_t structures ((ptr, val) pairs). + * @count is the length of the above array. + * @pdone is an output parameter indicating number of completed operations + * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this + * hypercall invocation. Can be DOMID_SELF. + * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced + * in this hypercall invocation. The value of this field + * (x) encodes the PFD as follows: + * x == 0 => PFD == DOMID_SELF + * x != 0 => PFD == x - 1 + * + * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command. + * ------------- + * ptr[1:0] == MMU_NORMAL_PT_UPDATE: + * Updates an entry in a page table belonging to PFD. If updating an L1 table, + * and the new table entry is valid/present, the mapped frame must belong to + * FD. If attempting to map an I/O page then the caller assumes the privilege + * of the FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the page-table entry to modify. + * val -- Value to write. + * + * There also certain implicit requirements when using this hypercall. The + * pages that make up a pagetable must be mapped read-only in the guest. + * This prevents uncontrolled guest updates to the pagetable. Xen strictly + * enforces this, and will disallow any pagetable update which will end up + * mapping pagetable page RW, and will disallow using any writable page as a + * pagetable. In practice it means that when constructing a page table for a + * process, thread, etc, we MUST be very dilligient in following these rules: + * 1). Start with top-level page (PGD or in Xen language: L4). Fill out + * the entries. + * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD + * or L2). + * 3). Start filling out the PTE table (L1) with the PTE entries. Once + * done, make sure to set each of those entries to RO (so writeable bit + * is unset). Once that has been completed, set the PMD (L2) for this + * PTE table as RO. + * 4). When completed with all of the PMD (L2) entries, and all of them have + * been set to RO, make sure to set RO the PUD (L3). Do the same + * operation on PGD (L4) pagetable entries that have a PUD (L3) entry. + * 5). Now before you can use those pages (so setting the cr3), you MUST also + * pin them so that the hypervisor can verify the entries. This is done + * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame + * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op( + * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be + * issued. + * For 32-bit guests, the L4 is not used (as there is less pagetables), so + * instead use L3. + * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE + * hypercall. Also if so desired the OS can also try to write to the PTE + * and be trapped by the hypervisor (as the PTE entry is RO). + * + * To deallocate the pages, the operations are the reverse of the steps + * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the + * pagetable MUST not be in use (meaning that the cr3 is not set to it). + * + * ptr[1:0] == MMU_MACHPHYS_UPDATE: + * Updates an entry in the machine->pseudo-physical mapping table. + * ptr[:2] -- Machine address within the frame whose mapping to modify. + * The frame must belong to the FD, if one is specified. + * val -- Value to write into the mapping entry. + * + * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: + * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed + * with those in @val. + * + * @val is usually the machine frame number along with some attributes. + * The attributes by default follow the architecture defined bits. Meaning that + * if this is a X86_64 machine and four page table layout is used, the layout + * of val is: + * - 63 if set means No execute (NX) + * - 46-13 the machine frame number + * - 12 available for guest + * - 11 available for guest + * - 10 available for guest + * - 9 available for guest + * - 8 global + * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages) + * - 6 dirty + * - 5 accessed + * - 4 page cached disabled + * - 3 page write through + * - 2 userspace accessible + * - 1 writeable + * - 0 present + * + * The one bits that does not fit with the default layout is the PAGE_PSE + * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the + * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB + * (or 2MB) instead of using the PAGE_PSE bit. + * + * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen + * using it as the Page Attribute Table (PAT) bit - for details on it please + * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of + * pages instead of using MTRRs. + * + * The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits): + * PAT4 PAT0 + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots) + * +-----+-----+----+----+----+-----+----+----+ + * | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen + * +-----+-----+----+----+----+-----+----+----+ + * + * The lookup of this index table translates to looking up + * Bit 7, Bit 4, and Bit 3 of val entry: + * + * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3). + * + * If all bits are off, then we are using PAT0. If bit 3 turned on, + * then we are using PAT1, if bit 3 and bit 4, then PAT2.. + * + * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means + * that if a guest that follows Linux's PAT setup and would like to set Write + * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is + * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the + * caching as: + * + * WB = none (so PAT0) + * WC = PWT (bit 3 on) + * UC = PWT | PCD (bit 3 and 4 are on). + * + * To make it work with Xen, it needs to translate the WC bit as so: + * + * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3 + * + * And to translate back it would: + * + * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. + */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ + +/* + * MMU EXTENDED OPERATIONS + * + * ` enum neg_errnoval + * ` HYPERVISOR_mmuext_op(mmuext_op_t uops[], + * ` unsigned int count, + * ` unsigned int *pdone, + * ` unsigned int foreigndom) + */ +/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. + * + * cmd: MMUEXT_(UN)PIN_*_TABLE + * mfn: Machine frame number to be (un)pinned as a p.t. page. + * The frame must belong to the FD, if one is specified. + * + * cmd: MMUEXT_NEW_BASEPTR + * mfn: Machine frame number of new page-table base to install in MMU. + * + * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only] + * mfn: Machine frame number of new page-table base to install in MMU + * when in user space. + * + * cmd: MMUEXT_TLB_FLUSH_LOCAL + * No additional arguments. Flushes local TLB. + * + * cmd: MMUEXT_INVLPG_LOCAL + * linear_addr: Linear address to be flushed from the local TLB. + * + * cmd: MMUEXT_TLB_FLUSH_MULTI + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_INVLPG_MULTI + * linear_addr: Linear address to be flushed. + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_TLB_FLUSH_ALL + * No additional arguments. Flushes all VCPUs' TLBs. + * + * cmd: MMUEXT_INVLPG_ALL + * linear_addr: Linear address to be flushed from all VCPUs' TLBs. + * + * cmd: MMUEXT_FLUSH_CACHE + * No additional arguments. Writes back and flushes cache contents. + * + * cmd: MMUEXT_FLUSH_CACHE_GLOBAL + * No additional arguments. Writes back and flushes cache contents + * on all CPUs in the system. + * + * cmd: MMUEXT_SET_LDT + * linear_addr: Linear address of LDT base (NB. must be page-aligned). + * nr_ents: Number of entries in LDT. + * + * cmd: MMUEXT_CLEAR_PAGE + * mfn: Machine frame number to be cleared. + * + * cmd: MMUEXT_COPY_PAGE + * mfn: Machine frame number of the destination page. + * src_mfn: Machine frame number of the source page. + * + * cmd: MMUEXT_[UN]MARK_SUPER + * mfn: Machine frame number of head of superpage to be [un]marked. + */ +/* ` enum mmuext_cmd { */ +#define MMUEXT_PIN_L1_TABLE 0 +#define MMUEXT_PIN_L2_TABLE 1 +#define MMUEXT_PIN_L3_TABLE 2 +#define MMUEXT_PIN_L4_TABLE 3 +#define MMUEXT_UNPIN_TABLE 4 +#define MMUEXT_NEW_BASEPTR 5 +#define MMUEXT_TLB_FLUSH_LOCAL 6 +#define MMUEXT_INVLPG_LOCAL 7 +#define MMUEXT_TLB_FLUSH_MULTI 8 +#define MMUEXT_INVLPG_MULTI 9 +#define MMUEXT_TLB_FLUSH_ALL 10 +#define MMUEXT_INVLPG_ALL 11 +#define MMUEXT_FLUSH_CACHE 12 +#define MMUEXT_SET_LDT 13 +#define MMUEXT_NEW_USER_BASEPTR 15 +#define MMUEXT_CLEAR_PAGE 16 +#define MMUEXT_COPY_PAGE 17 +#define MMUEXT_FLUSH_CACHE_GLOBAL 18 +#define MMUEXT_MARK_SUPER 19 +#define MMUEXT_UNMARK_SUPER 20 +/* ` } */ + +#ifndef __ASSEMBLY__ +struct mmuext_op { + unsigned int cmd; /* => enum mmuext_cmd */ + union { + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR + * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ + xen_pfn_t mfn; + /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ + unsigned long linear_addr; + } arg1; + union { + /* SET_LDT */ + unsigned int nr_ents; + /* TLB_FLUSH_MULTI, INVLPG_MULTI */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(const_void) vcpumask; +#else + const void *vcpumask; +#endif + /* COPY_PAGE */ + xen_pfn_t src_mfn; + } arg2; +}; +typedef struct mmuext_op mmuext_op_t; +DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); +#endif + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_update_va_mapping(unsigned long va, u64 val, + * ` enum uvm_flags flags) + * ` + * ` enum neg_errnoval + * ` HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, u64 val, + * ` enum uvm_flags flags, + * ` domid_t domid) + * ` + * ` @va: The virtual address whose mapping we want to change + * ` @val: The new page table entry, must contain a machine address + * ` @flags: Control TLB flushes + */ +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ +/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ +/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ +/* ` enum uvm_flags { */ +#define UVMF_NONE (0UL<<0) /* No flushing at all. */ +#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ +#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ +#define UVMF_FLUSHTYPE_MASK (3UL<<0) +#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ +#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ +#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ +/* ` } */ + +/* + * Commands to HYPERVISOR_console_io(). + */ +#define CONSOLEIO_write 0 +#define CONSOLEIO_read 1 + +/* + * Commands to HYPERVISOR_vm_assist(). + */ +#define VMASST_CMD_enable 0 +#define VMASST_CMD_disable 1 + +/* x86/32 guests: simulate full 4GB segment limits. */ +#define VMASST_TYPE_4gb_segments 0 + +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ +#define VMASST_TYPE_4gb_segments_notify 1 + +/* + * x86 guests: support writes to bottom-level PTEs. + * NB1. Page-directory entries cannot be written. + * NB2. Guest must continue to remove all writable mappings of PTEs. + */ +#define VMASST_TYPE_writable_pagetables 2 + +/* x86/PAE guests: support PDPTs above 4GB. */ +#define VMASST_TYPE_pae_extended_cr3 3 + +#define MAX_VMASST_TYPE 3 + +#ifndef __ASSEMBLY__ + +typedef uint16_t domid_t; + +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ +#define DOMID_FIRST_RESERVED (0x7FF0U) + +/* DOMID_SELF is used in certain contexts to refer to oneself. */ +#define DOMID_SELF (0x7FF0U) + +/* + * DOMID_IO is used to restrict page-table updates to mapping I/O memory. + * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO + * is useful to ensure that no mappings to the OS's own heap are accidentally + * installed. (e.g., in Linux this could cause havoc as reference counts + * aren't adjusted on the I/O-mapping code path). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can + * be specified by any calling domain. + */ +#define DOMID_IO (0x7FF1U) + +/* + * DOMID_XEN is used to allow privileged domains to map restricted parts of + * Xen's heap space (e.g., the machine_to_phys table). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if + * the caller is privileged. + */ +#define DOMID_XEN (0x7FF2U) + +/* + * DOMID_COW is used as the owner of sharable pages */ +#define DOMID_COW (0x7FF3U) + +/* DOMID_INVALID is used to identify pages with unknown owner. */ +#define DOMID_INVALID (0x7FF4U) + +/* Idle domain. */ +#define DOMID_IDLE (0x7FFFU) + +/* + * Send an array of these to HYPERVISOR_mmu_update(). + * NB. The fields are natural pointer/address size for this architecture. + */ +struct mmu_update { + uint64_t ptr; /* Machine address of PTE. */ + uint64_t val; /* New contents of PTE. */ +}; +typedef struct mmu_update mmu_update_t; +DEFINE_XEN_GUEST_HANDLE(mmu_update_t); + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_multicall(multicall_entry_t call_list[], + * ` uint32_t nr_calls); + * + * NB. The fields are logically the natural register size for this + * architecture. In cases where xen_ulong_t is larger than this then + * any unused bits in the upper portion must be zero. + */ +struct multicall_entry { + xen_ulong_t op, result; + xen_ulong_t args[6]; +}; +typedef struct multicall_entry multicall_entry_t; +DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00040400 +/* + * Event channel endpoints per domain (when using the 2-level ABI): + * 1024 if a long is 32 bits; 4096 if a long is 64 bits. + */ +#define NR_EVENT_CHANNELS EVTCHN_2L_NR_CHANNELS +#endif + +struct vcpu_time_info { + /* + * Updates to the following values are preceded and followed by an + * increment of 'version'. The guest can therefore detect updates by + * looking for changes to 'version'. If the least-significant bit of + * the version number is set then an update is in progress and the guest + * must wait to read a consistent set of values. + * The correct way to interact with the version number is similar to + * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry. + */ + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + + * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad1[3]; +}; /* 32 bytes */ +typedef struct vcpu_time_info vcpu_time_info_t; + +struct vcpu_info { + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + uint8_t evtchn_upcall_pending; +#ifdef XEN_HAVE_PV_UPCALL_MASK + uint8_t evtchn_upcall_mask; +#else /* XEN_HAVE_PV_UPCALL_MASK */ + uint8_t pad0; +#endif /* XEN_HAVE_PV_UPCALL_MASK */ + xen_ulong_t evtchn_pending_sel; + struct arch_vcpu_info arch; + struct vcpu_time_info time; +}; /* 64 bytes (x86) */ +#ifndef __XEN__ +typedef struct vcpu_info vcpu_info_t; +#endif + +/* + * `incontents 200 startofday_shared Start-of-day shared data structure + * Xen/kernel shared data -- pointer provided in start_info. + * + * This structure is defined to be both smaller than a page, and the + * only data on the shared page, but may vary in actual size even within + * compatible Xen versions; guests should not rely on the size + * of this structure remaining constant. + */ +struct shared_info { + struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; + + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C long' in the PENDING bitfield array. + */ + xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; + xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ + uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ + uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ + uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ + + struct arch_shared_info arch; + +}; +#ifndef __XEN__ +typedef struct shared_info shared_info_t; +#endif + +/* + * `incontents 200 startofday Start-of-day memory layout + * + * 1. The domain is started within contiguous virtual-memory region. + * 2. The contiguous region ends on an aligned 4MB boundary. + * 3. This the order of bootstrap elements in the initial virtual region: + * a. relocated kernel image + * b. initial ram disk [mod_start, mod_len] + * c. list of allocated page frames [mfn_list, nr_pages] + * (unless relocated due to XEN_ELFNOTE_INIT_P2M) + * d. start_info_t structure [register ESI (x86)] + * e. bootstrap page tables [pt_base and CR3 (x86)] + * f. bootstrap stack [register ESP (x86)] + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. + * 5. The initial ram disk may be omitted. + * 6. The list of page frames forms a contiguous 'pseudo-physical' memory + * layout for the domain. In particular, the bootstrap virtual-memory + * region is a 1:1 mapping to the first section of the pseudo-physical map. + * 7. All bootstrap elements are mapped read-writable for the guest OS. The + * only exception is the bootstrap page table, which is mapped read-only. + * 8. There is guaranteed to be at least 512kB padding after the final + * bootstrap element. If necessary, the bootstrap virtual region is + * extended by an extra 4MB to ensure this. + * + * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page + * table layout") a bug caused the pt_base (3.e above) and cr3 to not point + * to the start of the guest page tables (it was offset by two pages). + * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU + * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got + * allocated in the order: 'first L1','first L2', 'first L3', so the offset + * to the page table base is by two pages back. The initial domain if it is + * 32-bit and runs under a 64-bit hypervisor should _NOT_ use two of the + * pages preceding pt_base and mark them as reserved/unused. + */ +#ifdef XEN_HAVE_PV_GUEST_ENTRY +struct start_info { + /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "xen--". */ + unsigned long nr_pages; /* Total pages allocated to this domain. */ + unsigned long shared_info; /* MACHINE address of shared info struct. */ + uint32_t flags; /* SIF_xxx flags. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ + uint32_t store_evtchn; /* Event channel for store communication. */ + union { + struct { + xen_pfn_t mfn; /* MACHINE page number of console page. */ + uint32_t evtchn; /* Event channel for console page. */ + } domU; + struct { + uint32_t info_off; /* Offset of console_info struct. */ + uint32_t info_size; /* Size of console_info struct from start.*/ + } dom0; + } console; + /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ + unsigned long pt_base; /* VIRTUAL address of page directory. */ + unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ + unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module */ + /* (PFN of pre-loaded module if */ + /* SIF_MOD_START_PFN set in flags). */ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ +#define MAX_GUEST_CMDLINE 1024 + int8_t cmd_line[MAX_GUEST_CMDLINE]; + /* The pfn range here covers both page table and p->m table frames. */ + unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */ + unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */ +}; +typedef struct start_info start_info_t; + +/* New console union for dom0 introduced in 0x00030203. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +#define console_mfn console.domU.mfn +#define console_evtchn console.domU.evtchn +#endif +#endif /* XEN_HAVE_PV_GUEST_ENTRY */ + +/* These flags are passed in the 'flags' field of start_info_t. */ +#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ +#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ +#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ +#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ + +/* + * A multiboot module is a package containing modules very similar to a + * multiboot module array. The only differences are: + * - the array of module descriptors is by convention simply at the beginning + * of the multiboot module, + * - addresses in the module descriptors are based on the beginning of the + * multiboot module, + * - the number of modules is determined by a termination descriptor that has + * mod_start == 0. + * + * This permits to both build it statically and reference it in a configuration + * file, and let the PV guest easily rebase the addresses to virtual addresses + * and at the same time count the number of modules. + */ +struct xen_multiboot_mod_list +{ + /* Address of first byte of the module */ + uint32_t mod_start; + /* Address of last byte of the module (inclusive) */ + uint32_t mod_end; + /* Address of zero-terminated command line */ + uint32_t cmdline; + /* Unused, must be zero */ + uint32_t pad; +}; +/* + * `incontents 200 startofday_dom0_console Dom0_console + * + * The console structure in start_info.console.dom0 + * + * This structure includes a variety of information required to + * have a working VGA/VESA console. + */ +typedef struct dom0_vga_console_info { + uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ +#define XEN_VGATYPE_TEXT_MODE_3 0x03 +#define XEN_VGATYPE_VESA_LFB 0x23 +#define XEN_VGATYPE_EFI_LFB 0x70 + + union { + struct { + /* Font height, in pixels. */ + uint16_t font_height; + /* Cursor location (column, row). */ + uint16_t cursor_x, cursor_y; + /* Number of rows and columns (dimensions in characters). */ + uint16_t rows, columns; + } text_mode_3; + + struct { + /* Width and height, in pixels. */ + uint16_t width, height; + /* Bytes per scan line. */ + uint16_t bytes_per_line; + /* Bits per pixel. */ + uint16_t bits_per_pixel; + /* LFB physical address, and size (in units of 64kB). */ + uint32_t lfb_base; + uint32_t lfb_size; + /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ + uint8_t red_pos, red_size; + uint8_t green_pos, green_size; + uint8_t blue_pos, blue_size; + uint8_t rsvd_pos, rsvd_size; +#if __XEN_INTERFACE_VERSION__ >= 0x00030206 + /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ + uint32_t gbl_caps; + /* Mode attributes (offset 0x0, VESA command 0x4f01). */ + uint16_t mode_attrs; +#endif + } vesa_lfb; + } u; +} dom0_vga_console_info_t; +#define xen_vga_console_info dom0_vga_console_info +#define xen_vga_console_info_t dom0_vga_console_info_t + +typedef uint8_t xen_domain_handle_t[16]; + +/* Turn a plain number into a C unsigned long constant. */ +#define __mk_unsigned_long(x) x ## UL +#define mk_unsigned_long(x) __mk_unsigned_long(x) + +__DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t); +__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t); +__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t); +__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t); + +#else /* __ASSEMBLY__ */ + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define mk_unsigned_long(x) x + +#endif /* !__ASSEMBLY__ */ + +/* Default definitions for macros used by domctl/sysctl. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif +#ifndef XEN_GUEST_HANDLE_64 +#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) +#endif + +#ifndef __ASSEMBLY__ +struct xenctl_bitmap { + XEN_GUEST_HANDLE_64(uint8) bitmap; + uint32_t nr_bits; +}; +#endif + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#endif /* __XEN_PUBLIC_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/xenoprof.h xen-4.6.5/extras/mini-os/include/xen/xenoprof.h --- xen-4.6.0/extras/mini-os/include/xen/xenoprof.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/xenoprof.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,152 @@ +/****************************************************************************** + * xenoprof.h + * + * Interface for enabling system wide profiling based on hardware performance + * counters + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Hewlett-Packard Co. + * Written by Aravind Menon & Jose Renato Santos + */ + +#ifndef __XEN_PUBLIC_XENOPROF_H__ +#define __XEN_PUBLIC_XENOPROF_H__ + +#include "xen.h" + +/* + * Commands to HYPERVISOR_xenoprof_op(). + */ +#define XENOPROF_init 0 +#define XENOPROF_reset_active_list 1 +#define XENOPROF_reset_passive_list 2 +#define XENOPROF_set_active 3 +#define XENOPROF_set_passive 4 +#define XENOPROF_reserve_counters 5 +#define XENOPROF_counter 6 +#define XENOPROF_setup_events 7 +#define XENOPROF_enable_virq 8 +#define XENOPROF_start 9 +#define XENOPROF_stop 10 +#define XENOPROF_disable_virq 11 +#define XENOPROF_release_counters 12 +#define XENOPROF_shutdown 13 +#define XENOPROF_get_buffer 14 +#define XENOPROF_set_backtrace 15 + +/* AMD IBS support */ +#define XENOPROF_get_ibs_caps 16 +#define XENOPROF_ibs_counter 17 +#define XENOPROF_last_op 17 + +#define MAX_OPROF_EVENTS 32 +#define MAX_OPROF_DOMAINS 25 +#define XENOPROF_CPU_TYPE_SIZE 64 + +/* Xenoprof performance events (not Xen events) */ +struct event_log { + uint64_t eip; + uint8_t mode; + uint8_t event; +}; + +/* PC value that indicates a special code */ +#define XENOPROF_ESCAPE_CODE (~0ULL) +/* Transient events for the xenoprof->oprofile cpu buf */ +#define XENOPROF_TRACE_BEGIN 1 + +/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */ +struct xenoprof_buf { + uint32_t event_head; + uint32_t event_tail; + uint32_t event_size; + uint32_t vcpu_id; + uint64_t xen_samples; + uint64_t kernel_samples; + uint64_t user_samples; + uint64_t lost_samples; + struct event_log event_log[1]; +}; +#ifndef __XEN__ +typedef struct xenoprof_buf xenoprof_buf_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); +#endif + +struct xenoprof_init { + int32_t num_events; + int32_t is_primary; + char cpu_type[XENOPROF_CPU_TYPE_SIZE]; +}; +typedef struct xenoprof_init xenoprof_init_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t); + +struct xenoprof_get_buffer { + int32_t max_samples; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_gmaddr; +}; +typedef struct xenoprof_get_buffer xenoprof_get_buffer_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t); + +struct xenoprof_counter { + uint32_t ind; + uint64_t count; + uint32_t enabled; + uint32_t event; + uint32_t hypervisor; + uint32_t kernel; + uint32_t user; + uint64_t unit_mask; +}; +typedef struct xenoprof_counter xenoprof_counter_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t); + +typedef struct xenoprof_passive { + uint16_t domain_id; + int32_t max_samples; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_gmaddr; +} xenoprof_passive_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t); + +struct xenoprof_ibs_counter { + uint64_t op_enabled; + uint64_t fetch_enabled; + uint64_t max_cnt_fetch; + uint64_t max_cnt_op; + uint64_t rand_en; + uint64_t dispatched_ops; +}; +typedef struct xenoprof_ibs_counter xenoprof_ibs_counter_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_ibs_counter_t); + +#endif /* __XEN_PUBLIC_XENOPROF_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/include/xen/xsm/flask_op.h xen-4.6.5/extras/mini-os/include/xen/xsm/flask_op.h --- xen-4.6.0/extras/mini-os/include/xen/xsm/flask_op.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xen/xsm/flask_op.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,201 @@ +/* + * This file contains the flask_op hypercall commands and definitions. + * + * Author: George Coker, + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __FLASK_OP_H__ +#define __FLASK_OP_H__ + +#define XEN_FLASK_INTERFACE_VERSION 1 + +struct xen_flask_load { + XEN_GUEST_HANDLE(char) buffer; + uint32_t size; +}; + +struct xen_flask_setenforce { + uint32_t enforcing; +}; + +struct xen_flask_sid_context { + /* IN/OUT: sid to convert to/from string */ + uint32_t sid; + /* IN: size of the context buffer + * OUT: actual size of the output context string + */ + uint32_t size; + XEN_GUEST_HANDLE(char) context; +}; + +struct xen_flask_access { + /* IN: access request */ + uint32_t ssid; + uint32_t tsid; + uint32_t tclass; + uint32_t req; + /* OUT: AVC data */ + uint32_t allowed; + uint32_t audit_allow; + uint32_t audit_deny; + uint32_t seqno; +}; + +struct xen_flask_transition { + /* IN: transition SIDs and class */ + uint32_t ssid; + uint32_t tsid; + uint32_t tclass; + /* OUT: new SID */ + uint32_t newsid; +}; + +struct xen_flask_userlist { + /* IN: starting SID for list */ + uint32_t start_sid; + /* IN: size of user string and output buffer + * OUT: number of SIDs returned */ + uint32_t size; + union { + /* IN: user to enumerate SIDs */ + XEN_GUEST_HANDLE(char) user; + /* OUT: SID list */ + XEN_GUEST_HANDLE(uint32) sids; + } u; +}; + +struct xen_flask_boolean { + /* IN/OUT: numeric identifier for boolean [GET/SET] + * If -1, name will be used and bool_id will be filled in. */ + uint32_t bool_id; + /* OUT: current enforcing value of boolean [GET/SET] */ + uint8_t enforcing; + /* OUT: pending value of boolean [GET/SET] */ + uint8_t pending; + /* IN: new value of boolean [SET] */ + uint8_t new_value; + /* IN: commit new value instead of only setting pending [SET] */ + uint8_t commit; + /* IN: size of boolean name buffer [GET/SET] + * OUT: actual size of name [GET only] */ + uint32_t size; + /* IN: if bool_id is -1, used to find boolean [GET/SET] + * OUT: textual name of boolean [GET only] + */ + XEN_GUEST_HANDLE(char) name; +}; + +struct xen_flask_setavc_threshold { + /* IN */ + uint32_t threshold; +}; + +struct xen_flask_hash_stats { + /* OUT */ + uint32_t entries; + uint32_t buckets_used; + uint32_t buckets_total; + uint32_t max_chain_len; +}; + +struct xen_flask_cache_stats { + /* IN */ + uint32_t cpu; + /* OUT */ + uint32_t lookups; + uint32_t hits; + uint32_t misses; + uint32_t allocations; + uint32_t reclaims; + uint32_t frees; +}; + +struct xen_flask_ocontext { + /* IN */ + uint32_t ocon; + uint32_t sid; + uint64_t low, high; +}; + +struct xen_flask_peersid { + /* IN */ + evtchn_port_t evtchn; + /* OUT */ + uint32_t sid; +}; + +struct xen_flask_relabel { + /* IN */ + uint32_t domid; + uint32_t sid; +}; + +struct xen_flask_op { + uint32_t cmd; +#define FLASK_LOAD 1 +#define FLASK_GETENFORCE 2 +#define FLASK_SETENFORCE 3 +#define FLASK_CONTEXT_TO_SID 4 +#define FLASK_SID_TO_CONTEXT 5 +#define FLASK_ACCESS 6 +#define FLASK_CREATE 7 +#define FLASK_RELABEL 8 +#define FLASK_USER 9 +#define FLASK_POLICYVERS 10 +#define FLASK_GETBOOL 11 +#define FLASK_SETBOOL 12 +#define FLASK_COMMITBOOLS 13 +#define FLASK_MLS 14 +#define FLASK_DISABLE 15 +#define FLASK_GETAVC_THRESHOLD 16 +#define FLASK_SETAVC_THRESHOLD 17 +#define FLASK_AVC_HASHSTATS 18 +#define FLASK_AVC_CACHESTATS 19 +#define FLASK_MEMBER 20 +#define FLASK_ADD_OCONTEXT 21 +#define FLASK_DEL_OCONTEXT 22 +#define FLASK_GET_PEER_SID 23 +#define FLASK_RELABEL_DOMAIN 24 + uint32_t interface_version; /* XEN_FLASK_INTERFACE_VERSION */ + union { + struct xen_flask_load load; + struct xen_flask_setenforce enforce; + /* FLASK_CONTEXT_TO_SID and FLASK_SID_TO_CONTEXT */ + struct xen_flask_sid_context sid_context; + struct xen_flask_access access; + /* FLASK_CREATE, FLASK_RELABEL, FLASK_MEMBER */ + struct xen_flask_transition transition; + struct xen_flask_userlist userlist; + /* FLASK_GETBOOL, FLASK_SETBOOL */ + struct xen_flask_boolean boolean; + struct xen_flask_setavc_threshold setavc_threshold; + struct xen_flask_hash_stats hash_stats; + struct xen_flask_cache_stats cache_stats; + /* FLASK_ADD_OCONTEXT, FLASK_DEL_OCONTEXT */ + struct xen_flask_ocontext ocontext; + struct xen_flask_peersid peersid; + struct xen_flask_relabel relabel; + } u; +}; +typedef struct xen_flask_op xen_flask_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_flask_op_t); + +#endif diff -Nru xen-4.6.0/extras/mini-os/include/xenbus.h xen-4.6.5/extras/mini-os/include/xenbus.h --- xen-4.6.0/extras/mini-os/include/xenbus.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xenbus.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,120 @@ +#ifndef XENBUS_H__ +#define XENBUS_H__ + +#include + +typedef unsigned long xenbus_transaction_t; +#define XBT_NIL ((xenbus_transaction_t)0) + +#ifdef CONFIG_XENBUS +/* Initialize the XenBus system. */ +void init_xenbus(void); +#else +static inline void init_xenbus(void) +{ +} +#endif + +/* Read the value associated with a path. Returns a malloc'd error + string on failure and sets *value to NULL. On success, *value is + set to a malloc'd copy of the value. */ +char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value); + +/* Watch event queue */ +struct xenbus_event { + /* Keep these two as this for xs.c */ + char *path; + char *token; + struct xenbus_event *next; +}; +typedef struct xenbus_event *xenbus_event_queue; + +char *xenbus_watch_path_token(xenbus_transaction_t xbt, const char *path, const char *token, xenbus_event_queue *events); +char *xenbus_unwatch_path_token(xenbus_transaction_t xbt, const char *path, const char *token); +extern struct wait_queue_head xenbus_watch_queue; +void xenbus_wait_for_watch(xenbus_event_queue *queue); +char **xenbus_wait_for_watch_return(xenbus_event_queue *queue); +char* xenbus_wait_for_value(const char *path, const char *value, xenbus_event_queue *queue); +char *xenbus_wait_for_state_change(const char* path, XenbusState *state, xenbus_event_queue *queue); +char *xenbus_switch_state(xenbus_transaction_t xbt, const char* path, XenbusState state); + +/* When no token is provided, use a global queue. */ +#define XENBUS_WATCH_PATH_TOKEN "xenbus_watch_path" +extern xenbus_event_queue xenbus_events; +#define xenbus_watch_path(xbt, path) xenbus_watch_path_token(xbt, path, XENBUS_WATCH_PATH_TOKEN, NULL) +#define xenbus_unwatch_path(xbt, path) xenbus_unwatch_path_token(xbt, path, XENBUS_WATCH_PATH_TOKEN) + + +/* Associates a value with a path. Returns a malloc'd error string on + failure. */ +char *xenbus_write(xenbus_transaction_t xbt, const char *path, const char *value); + +struct write_req { + const void *data; + unsigned len; +}; + +/* Send a message to xenbus, in the same fashion as xb_write, and + block waiting for a reply. The reply is malloced and should be + freed by the caller. */ +struct xsd_sockmsg * +xenbus_msg_reply(int type, + xenbus_transaction_t trans, + struct write_req *io, + int nr_reqs); + +/* Removes the value associated with a path. Returns a malloc'd error + string on failure. */ +char *xenbus_rm(xenbus_transaction_t xbt, const char *path); + +/* List the contents of a directory. Returns a malloc'd error string + on failure and sets *contents to NULL. On success, *contents is + set to a malloc'd array of pointers to malloc'd strings. The array + is NULL terminated. May block. */ +char *xenbus_ls(xenbus_transaction_t xbt, const char *prefix, char ***contents); + +/* Reads permissions associated with a path. Returns a malloc'd error + string on failure and sets *value to NULL. On success, *value is + set to a malloc'd copy of the value. */ +char *xenbus_get_perms(xenbus_transaction_t xbt, const char *path, char **value); + +/* Sets the permissions associated with a path. Returns a malloc'd + error string on failure. */ +char *xenbus_set_perms(xenbus_transaction_t xbt, const char *path, domid_t dom, char perm); + +/* Start a xenbus transaction. Returns the transaction in xbt on + success or a malloc'd error string otherwise. */ +char *xenbus_transaction_start(xenbus_transaction_t *xbt); + +/* End a xenbus transaction. Returns a malloc'd error string if it + fails. abort says whether the transaction should be aborted. + Returns 1 in *retry iff the transaction should be retried. */ +char *xenbus_transaction_end(xenbus_transaction_t, int abort, + int *retry); + +/* Read path and parse it as an integer. Returns -1 on error. */ +int xenbus_read_integer(const char *path); + +/* Read path and parse it as 16 byte uuid. Returns 1 if + * read and parsing were successful, 0 if not */ +int xenbus_read_uuid(const char* path, unsigned char uuid[16]); + +/* Contraction of snprintf and xenbus_write(path/node). */ +char* xenbus_printf(xenbus_transaction_t xbt, + const char* node, const char* path, + const char* fmt, ...) + __attribute__((__format__(printf, 4, 5))); + +/* Utility function to figure out our domain id */ +domid_t xenbus_get_self_id(void); + +#ifdef CONFIG_XENBUS +/* Reset the XenBus system. */ +void fini_xenbus(void); +#else +static inline void fini_xenbus(void) +{ +} +#endif + +#endif /* XENBUS_H__ */ diff -Nru xen-4.6.0/extras/mini-os/include/xmalloc.h xen-4.6.5/extras/mini-os/include/xmalloc.h --- xen-4.6.0/extras/mini-os/include/xmalloc.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/include/xmalloc.h 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,44 @@ +#ifndef __XMALLOC_H__ +#define __XMALLOC_H__ + +#ifdef HAVE_LIBC + +#include +#include +/* Allocate space for typed object. */ +#define _xmalloc(size, align) memalign(align, size) +#define xfree(ptr) free(ptr) + +#else + +#include + +#define DEFAULT_ALIGN (sizeof(unsigned long)) + +extern void *malloc(size_t size); +extern void *realloc(void *ptr, size_t size); +extern void free(void *ptr); + +/* Free memory from any xmalloc*() call. */ +extern void xfree(const void *); + +/* Underlying functions */ +extern void *_xmalloc(size_t size, size_t align); + +#endif + +static inline void *_xmalloc_array(size_t size, size_t align, size_t num) +{ + /* Check for overflow. */ + if (size && num > UINT_MAX / size) + return NULL; + return _xmalloc(size * num, align); +} + +/* Allocate space for typed object. */ +#define xmalloc(_type) ((_type *)_xmalloc(sizeof(_type), __alignof__(_type))) + +/* Allocate space for array of typed objects. */ +#define xmalloc_array(_type, _num) ((_type *)_xmalloc_array(sizeof(_type), __alignof__(_type), _num)) + +#endif /* __XMALLOC_H__ */ diff -Nru xen-4.6.0/extras/mini-os/kernel.c xen-4.6.5/extras/mini-os/kernel.c --- xen-4.6.0/extras/mini-os/kernel.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/kernel.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,198 @@ +/****************************************************************************** + * kernel.c + * + * Assorted crap goes here, including the initial C entry point, jumped at + * from head.S. + * + * Copyright (c) 2002-2003, K A Fraser & R Neugebauer + * Copyright (c) 2005, Grzegorz Milos, Intel Research Cambridge + * Copyright (c) 2006, Robert Kaiser, FH Wiesbaden + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32]; + +void setup_xen_features(void) +{ + xen_feature_info_t fi; + int i, j; + + for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) + { + fi.submap_idx = i; + if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) + break; + + for (j=0; j<32; j++) + xen_features[i*32+j] = !!(fi.submap & 1< + +unsigned char _ctype[] = { +_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ +_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ +_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ +_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ +_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ +_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ +_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ +_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ +_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ +_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ +_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ +_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ +_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ +_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ +#endif diff -Nru xen-4.6.0/extras/mini-os/lib/math.c xen-4.6.5/extras/mini-os/lib/math.c --- xen-4.6.0/extras/mini-os/lib/math.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/lib/math.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,426 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: math.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Library functions for 64bit arith and other + * from freebsd, files in sys/libkern/ (qdivrem.c, etc) + * + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * +*/ + +#include +#include +#include + +/* XXX RN: Yuck hardcoded endianess :) */ +#define _QUAD_HIGHWORD 1 +#define _QUAD_LOWWORD 0 + +/* + * From + * @(#)quad.h 8.1 (Berkeley) 6/4/93 + */ + +/* + * Depending on the desired operation, we view a `long long' (aka quad_t) in + * one or more of the following formats. + */ +union uu { + quad_t q; /* as a (signed) quad */ + quad_t uq; /* as an unsigned quad */ + int32_t sl[2]; /* as two signed longs */ + uint32_t ul[2]; /* as two unsigned longs */ +}; + +/* + * Define high and low longwords. + */ +#define H _QUAD_HIGHWORD +#define L _QUAD_LOWWORD + +/* + * Total number of bits in an quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +#ifndef HAVE_LIBC +#define CHAR_BIT 8 /* number of bits in a char */ +#endif +#define QUAD_BITS (sizeof(quad_t) * CHAR_BIT) +#define LONG_BITS (sizeof(int32_t) * CHAR_BIT) +#define HALF_BITS (sizeof(int32_t) * CHAR_BIT / 2) + +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of int32_t, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be uint32_t.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(int32_t)*CHAR_BIT/2). + */ +#define HHALF(x) ((x) >> HALF_BITS) +#define LHALF(x) ((x) & ((1UL << HALF_BITS) - 1)) +#define LHUP(x) ((x) << HALF_BITS) + + +/* + * From + * qdivrem.c + */ + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ +#define B (1UL << HALF_BITS) /* digit base */ + +/* Combine two `digits' to make a single two-digit number. */ +#define COMBINE(a, b) (((uint32_t)(a) << HALF_BITS) | (b)) + +/* select a type for digits in base B: */ +typedef uint16_t digit; + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +static void +shl(register digit *p, register int len, register int sh) +{ + register int i; + + for (i = 0; i < len; i++) + p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); + p[i] = LHALF(p[i] << sh); +} + +/* + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within uint32_t. As a consequence, the maximum length dividend and + * divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +u_quad_t +__qdivrem(u_quad_t uq, u_quad_t vq, u_quad_t *arq) +{ + union uu tmp; + digit *u, *v, *q; + register digit v1, v2; + uint32_t qhat, rhat, t; + int m, n, d, j, i; + digit uspace[5], vspace[5], qspace[5]; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + static volatile const unsigned int zero = 0; + + tmp.ul[H] = tmp.ul[L] = 1 / zero; + if (arq) + *arq = uq; + return (tmp.q); + } + if (uq < vq) { + if (arq) + *arq = uq; + return (0); + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.uq = uq; + u[0] = 0; + u[1] = HHALF(tmp.ul[H]); + u[2] = LHALF(tmp.ul[H]); + u[3] = HHALF(tmp.ul[L]); + u[4] = LHALF(tmp.ul[L]); + tmp.uq = vq; + v[1] = HHALF(tmp.ul[H]); + v[2] = LHALF(tmp.ul[H]); + v[3] = HHALF(tmp.ul[L]); + v[4] = LHALF(tmp.ul[L]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + uint32_t rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = u[1] / t; + rbj = COMBINE(u[1] % t, u[2]); + q2 = rbj / t; + rbj = COMBINE(rbj % t, u[3]); + q3 = rbj / t; + rbj = COMBINE(rbj % t, u[4]); + q4 = rbj / t; + if (arq) + *arq = rbj % t; + tmp.ul[H] = COMBINE(q1, q2); + tmp.ul[L] = COMBINE(q3, q4); + return (tmp.q); + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) + m--; + for (i = 4 - m; --i >= 0;) + q[i] = 0; + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + d = 0; + for (t = v[1]; t < B / 2; t <<= 1) + d++; + if (d > 0) { + shl(&u[0], m + n, d); /* u <<= d */ + shl(&v[1], n - 1, d); /* v <<= d */ + } + /* + * D2: j = 0. + */ + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + register digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + uint32_t nn = COMBINE(uj0, uj1); + qhat = nn / v1; + rhat = nn % v1; + } + while (v2 * qhat > COMBINE(rhat, uj2)) { + qhat_too_big: + qhat--; + if ((rhat += v1) >= B) + break; + } + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = LHALF(t); + t = HHALF(t); + } + u[j] = LHALF(u[j] + t); + } + q[j] = qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + u[i] = (u[i] >> d) | + LHALF(u[i - 1] << (HALF_BITS - d)); + u[i] = 0; + } + tmp.ul[H] = COMBINE(uspace[1], uspace[2]); + tmp.ul[L] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.q; + } + + tmp.ul[H] = COMBINE(qspace[1], qspace[2]); + tmp.ul[L] = COMBINE(qspace[3], qspace[4]); + return (tmp.q); +} + +/* + * From + * divdi3.c + */ + +/* + * Divide two signed quads. + * ??? if -1/2 should produce -1 on this machine, this code is wrong + */ +quad_t +__divdi3(quad_t a, quad_t b) +{ + u_quad_t ua, ub, uq; + int neg; + + if (a < 0) + ua = -(u_quad_t)a, neg = 1; + else + ua = a, neg = 0; + if (b < 0) + ub = -(u_quad_t)b, neg ^= 1; + else + ub = b; + uq = __qdivrem(ua, ub, (u_quad_t *)0); + return (neg ? -uq : uq); +} + +/* + * From + * udivdi3.c + */ + +/* + * Divide two unsigned quads. + */ +u_quad_t +__udivdi3(u_quad_t a, u_quad_t b) +{ + return (__qdivrem(a, b, (u_quad_t *)0)); +} + +/* + * From + * umoddi3.c + */ + +/* + * Return remainder after dividing two unsigned quads. + */ +u_quad_t +__umoddi3(u_quad_t a, u_quad_t b) +{ + u_quad_t r; + + (void)__qdivrem(a, b, &r); + return (r); +} + +/* + * From + * moddi3.c + */ + +/* + * Return remainder after dividing two signed quads. + * + * XXX + * If -1/2 should produce -1 on this machine, this code is wrong. + */ +quad_t +__moddi3(quad_t a, quad_t b) +{ + u_quad_t ua, ub, ur; + int neg; + + if (a < 0) + ua = -(u_quad_t)a, neg = 1; + else + ua = a, neg = 0; + if (b < 0) + ub = -(u_quad_t)b; + else + ub = b; + (void)__qdivrem(ua, ub, &ur); + return (neg ? -ur : ur); +} diff -Nru xen-4.6.0/extras/mini-os/lib/printf.c xen-4.6.5/extras/mini-os/lib/printf.c --- xen-4.6.0/extras/mini-os/lib/printf.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/lib/printf.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,788 @@ +/* + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: printf.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: Aug 2003, Aug 2005 + * + * Environment: Xen Minimal OS + * Description: Library functions for printing + * (Linux port, mainly lib/vsprintf.c) + * + **************************************************************************** + */ + +/* + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* vsprintf.c -- Lars Wirzenius & Linus Torvalds. */ +/* + * Wirzenius wrote this portably, Torvalds fucked it up :-) + */ + +/* + * Fri Jul 13 2001 Crutcher Dunnavant + * - changed to provide snprintf and vsnprintf functions + * So Feb 1 16:51:32 CET 2004 Juergen Quade + * - scnprintf and vscnprintf + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#if !defined HAVE_LIBC + +#include +#include +#include +#include +#include +#include +#include + +/** + * simple_strtoul - convert a string to an unsigned long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) +{ + unsigned long result = 0,value; + + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +/** + * simple_strtol - convert a string to a signed long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +long simple_strtol(const char *cp,char **endp,unsigned int base) +{ + if(*cp=='-') + return -simple_strtoul(cp+1,endp,base); + return simple_strtoul(cp,endp,base); +} + +/** + * simple_strtoull - convert a string to an unsigned long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base) +{ + unsigned long long result = 0,value; + + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp) + ? toupper(*cp) : *cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +/** + * simple_strtoll - convert a string to a signed long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +long long simple_strtoll(const char *cp,char **endp,unsigned int base) +{ + if(*cp=='-') + return -simple_strtoull(cp+1,endp,base); + return simple_strtoull(cp,endp,base); +} + +static int skip_atoi(const char **s) +{ + int i=0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits; + const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int i; + + digits = (type & LARGE) ? large_digits : small_digits; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return buf; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else + { + /* XXX KAF: force unsigned mod and div. */ + unsigned long long num2=(unsigned long long)num; + unsigned int base2=(unsigned int)base; + while (num2 != 0) { tmp[i++] = digits[num2%base2]; num2 /= base2; } + } + if (i > precision) + precision = i; + size -= precision; + if (!(type&(ZEROPAD+LEFT))) { + while(size-->0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + } + if (sign) { + if (buf <= end) + *buf = sign; + ++buf; + } + if (type & SPECIAL) { + if (base==8) { + if (buf <= end) + *buf = '0'; + ++buf; + } else if (base==16) { + if (buf <= end) + *buf = '0'; + ++buf; + if (buf <= end) + *buf = digits[33]; + ++buf; + } + } + if (!(type & LEFT)) { + while (size-- > 0) { + if (buf <= end) + *buf = c; + ++buf; + } + } + while (i < precision--) { + if (buf <= end) + *buf = '0'; + ++buf; + } + while (i-- > 0) { + if (buf <= end) + *buf = tmp[i]; + ++buf; + } + while (size-- > 0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + return buf; +} + +/** +* vsnprintf - Format a string and place it in a buffer +* @buf: The buffer to place the result into +* @size: The size of the buffer, including the trailing null space +* @fmt: The format string to use +* @args: Arguments for the format string +* +* Call this function if you are already dealing with a va_list. +* You probably want snprintf instead. + */ +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char *str, *end, c; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + str = buf; + end = buf + size - 1; + + if (end < buf - 1) { + end = ((void *) -1); + size = end - buf + 1; + } + + for (; *fmt ; ++fmt) { + if (*fmt != '%') { + if (str <= end) + *str = *fmt; + ++str; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= LEFT; goto repeat; + case '+': flags |= PLUS; goto repeat; + case ' ': flags |= SPACE; goto repeat; + case '#': flags |= SPECIAL; goto repeat; + case '0': flags |= ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z' || *fmt == 'z') { + qualifier = *fmt; + ++fmt; + if (qualifier == 'l' && *fmt == 'l') { + qualifier = 'L'; + ++fmt; + } else if (qualifier == 'z') { + qualifier = 'Z'; + } + } + if (*fmt == 'q') { + qualifier = 'L'; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) { + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + } + c = (unsigned char) va_arg(args, int); + if (str <= end) + *str = c; + ++str; + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = ""; + + len = strnlen(s, precision); + + if (!(flags & LEFT)) { + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + } + for (i = 0; i < len; ++i) { + if (str <= end) + *str = *s; + ++str; ++s; + } + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, end, + (unsigned long) va_arg(args, void *), + 16, field_width, precision, flags); + continue; + + + case 'n': + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + if (str <= end) + *str = '%'; + ++str; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + if (str <= end) + *str = '%'; + ++str; + if (*fmt) { + if (str <= end) + *str = *fmt; + ++str; + } else { + --fmt; + } + continue; + } + if (qualifier == 'L') + num = va_arg(args, long long); + else if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & SIGN) + num = (signed long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & SIGN) + num = (signed int) num; + } + + str = number(str, end, num, base, + field_width, precision, flags); + } + if (str <= end) + *str = '\0'; + else if (size > 0) + /* don't write out a null byte if the buf size is zero */ + *end = '\0'; + /* the trailing null byte doesn't count towards the total + * ++str; + */ + return str-buf; +} + +/** + * snprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @...: Arguments for the format string + */ +int snprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsnprintf(buf,size,fmt,args); + va_end(args); + return i; +} + +/** + * vsprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @fmt: The format string to use + * @args: Arguments for the format string + * + * Call this function if you are already dealing with a va_list. + * You probably want sprintf instead. + */ +int vsprintf(char *buf, const char *fmt, va_list args) +{ + return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args); +} + + +/** + * sprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @fmt: The format string to use + * @...: Arguments for the format string + */ +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} + +/** + * vsscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: format of buffer + * @args: arguments + */ +int vsscanf(const char * buf, const char * fmt, va_list args) +{ + const char *str = buf; + char *next; + char digit; + int num = 0; + int qualifier; + int base; + int field_width; + int is_sign = 0; + + while(*fmt && *str) { + /* skip any white space in format */ + /* white space in format matchs any amount of + * white space, including none, in the input. + */ + if (isspace(*fmt)) { + while (isspace(*fmt)) + ++fmt; + while (isspace(*str)) + ++str; + } + + /* anything that is not a conversion must match exactly */ + if (*fmt != '%' && *fmt) { + if (*fmt++ != *str++) + break; + continue; + } + + if (!*fmt) + break; + ++fmt; + + /* skip this conversion. + * advance both strings to next white space + */ + if (*fmt == '*') { + while (!isspace(*fmt) && *fmt) + fmt++; + while (!isspace(*str) && *str) + str++; + continue; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + + /* get conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || + *fmt == 'Z' || *fmt == 'z') { + qualifier = *fmt++; + if (unlikely(qualifier == *fmt)) { + if (qualifier == 'h') { + qualifier = 'H'; + fmt++; + } else if (qualifier == 'l') { + qualifier = 'L'; + fmt++; + } + } + } + base = 10; + is_sign = 0; + + if (!*fmt || !*str) + break; + + switch(*fmt++) { + case 'c': + { + char *s = (char *) va_arg(args,char*); + if (field_width == -1) + field_width = 1; + do { + *s++ = *str++; + } while (--field_width > 0 && *str); + num++; + } + continue; + case 's': + { + char *s = (char *) va_arg(args, char *); + if(field_width == -1) + field_width = INT_MAX; + /* first, skip leading white space in buffer */ + while (isspace(*str)) + str++; + + /* now copy until next white space */ + while (*str && !isspace(*str) && field_width--) { + *s++ = *str++; + } + *s = '\0'; + num++; + } + continue; + case 'n': + /* return number of characters read so far */ + { + int *i = (int *)va_arg(args,int*); + *i = str - buf; + } + continue; + case 'o': + base = 8; + break; + case 'x': + case 'X': + base = 16; + break; + case 'i': + base = 0; + case 'd': + is_sign = 1; + case 'u': + break; + case '%': + /* looking for '%' in str */ + if (*str++ != '%') + return num; + continue; + default: + /* invalid format; stop here */ + return num; + } + + /* have some sort of integer conversion. + * first, skip white space in buffer. + */ + while (isspace(*str)) + str++; + + digit = *str; + if (is_sign && digit == '-') + digit = *(str + 1); + + if (!digit + || (base == 16 && !isxdigit(digit)) + || (base == 10 && !isdigit(digit)) + || (base == 8 && (!isdigit(digit) || digit > '7')) + || (base == 0 && !isdigit(digit))) + break; + + switch(qualifier) { + case 'H': /* that's 'hh' in format */ + if (is_sign) { + signed char *s = (signed char *) va_arg(args,signed char *); + *s = (signed char) simple_strtol(str,&next,base); + } else { + unsigned char *s = (unsigned char *) va_arg(args, unsigned char *); + *s = (unsigned char) simple_strtoul(str, &next, base); + } + break; + case 'h': + if (is_sign) { + short *s = (short *) va_arg(args,short *); + *s = (short) simple_strtol(str,&next,base); + } else { + unsigned short *s = (unsigned short *) va_arg(args, unsigned short *); + *s = (unsigned short) simple_strtoul(str, &next, base); + } + break; + case 'l': + if (is_sign) { + long *l = (long *) va_arg(args,long *); + *l = simple_strtol(str,&next,base); + } else { + unsigned long *l = (unsigned long*) va_arg(args,unsigned long*); + *l = simple_strtoul(str,&next,base); + } + break; + case 'L': + if (is_sign) { + long long *l = (long long*) va_arg(args,long long *); + *l = simple_strtoll(str,&next,base); + } else { + unsigned long long *l = (unsigned long long*) va_arg(args,unsigned long long*); + *l = simple_strtoull(str,&next,base); + } + break; + case 'Z': + case 'z': + { + size_t *s = (size_t*) va_arg(args,size_t*); + *s = (size_t) simple_strtoul(str,&next,base); + } + break; + default: + if (is_sign) { + int *i = (int *) va_arg(args, int*); + *i = (int) simple_strtol(str,&next,base); + } else { + unsigned int *i = (unsigned int*) va_arg(args, unsigned int*); + *i = (unsigned int) simple_strtoul(str,&next,base); + } + break; + } + num++; + + if (!next) + break; + str = next; + } + return num; +} + +/** + * sscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: formatting of buffer + * @...: resulting arguments + */ +int sscanf(const char * buf, const char * fmt, ...) +{ + va_list args; + int i; + + va_start(args,fmt); + i = vsscanf(buf,fmt,args); + va_end(args); + return i; +} + +#endif diff -Nru xen-4.6.0/extras/mini-os/lib/stack_chk_fail.c xen-4.6.5/extras/mini-os/lib/stack_chk_fail.c --- xen-4.6.0/extras/mini-os/lib/stack_chk_fail.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/lib/stack_chk_fail.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,8 @@ +#include +#include + +void __stack_chk_fail(void) +{ + printk("stack smashing detected\n"); + do_exit(); +} diff -Nru xen-4.6.0/extras/mini-os/lib/string.c xen-4.6.5/extras/mini-os/lib/string.c --- xen-4.6.0/extras/mini-os/lib/string.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/lib/string.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,228 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: string.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Library function for string and memory manipulation + * Origin unknown + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + +#include + +/* newlib defines ffs but not ffsll or ffsl */ +int __ffsti2 (long long int lli) +{ + int i, num, t, tmpint, len; + + num = sizeof(long long int) / sizeof(int); + if (num == 1) return (ffs((int) lli)); + len = sizeof(int) * 8; + + for (i = 0; i < num; i++) { + tmpint = (int) (((lli >> len) << len) ^ lli); + + t = ffs(tmpint); + if (t) + return (t + i * len); + lli = lli >> len; + } + return 0; +} + +int __ffsdi2 (long int li) +{ + return __ffsti2 ((long long int) li); +} + +int ffsl (long int li) +{ + return __ffsti2 ((long long int) li); +} + +int ffsll (long long int lli) +{ + return __ffsti2 (lli); +} + +#if !defined HAVE_LIBC + +#include +#include +#include +#include + +int memcmp(const void * cs,const void * ct,size_t count) +{ + const unsigned char *su1, *su2; + signed char res = 0; + + for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) + if ((res = *su1 - *su2) != 0) + break; + return res; +} + +void * memcpy(void * dest,const void *src,size_t count) +{ + char *tmp = (char *) dest; + const char *s = src; + + while (count--) + *tmp++ = *s++; + + return dest; +} + +int strncmp(const char * cs,const char * ct,size_t count) +{ + register signed char __res = 0; + + while (count) { + if ((__res = *cs - *ct++) != 0 || !*cs++) + break; + count--; + } + + return __res; +} + +int strcmp(const char * cs,const char * ct) +{ + register signed char __res; + + while (1) { + if ((__res = *cs - *ct++) != 0 || !*cs++) + break; + } + + return __res; +} + +char * strcpy(char * dest,const char *src) +{ + char *tmp = dest; + + while ((*dest++ = *src++) != '\0') + /* nothing */; + return tmp; +} + +char * strncpy(char * dest,const char *src,size_t count) +{ + char *tmp = dest; + + while (count-- && (*dest++ = *src++) != '\0') + /* nothing */; + + return tmp; +} + +void * memset(void * s,int c,size_t count) +{ + char *xs = (char *) s; + + while (count--) + *xs++ = c; + + return s; +} + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + + +char * strcat(char * dest, const char * src) +{ + char *tmp = dest; + + while (*dest) + dest++; + + while ((*dest++ = *src++) != '\0'); + + return tmp; +} + +size_t strlen(const char * s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +char * strchr(const char * s, int c) +{ + for(; *s != (char) c; ++s) + if (*s == '\0') + return NULL; + return (char *)s; +} + +char * strrchr(const char * s, int c) +{ + const char *res = NULL; + for(; *s != '\0'; ++s) + if (*s == (char) c) + res = s; + return (char *)res; +} + +char * strstr(const char * s1,const char * s2) +{ + int l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *) s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1,s2,l2)) + return (char *) s1; + s1++; + } + return NULL; +} + +char *strdup(const char *x) +{ + int l = strlen(x); + char *res = malloc(l + 1); + if (!res) return NULL; + memcpy(res, x, l + 1); + return res; +} + +int ffs(int i) +{ + int c = 1; + + do { + if (i & 1) + return (c); + i = i >> 1; + c++; + } while (i); + return 0; +} + +#endif diff -Nru xen-4.6.0/extras/mini-os/lib/sys.c xen-4.6.5/extras/mini-os/lib/sys.c --- xen-4.6.0/extras/mini-os/lib/sys.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/lib/sys.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,1552 @@ +/* + * POSIX-compatible libc layer + * + * Samuel Thibault , October 2007 + * + * Provides the UNIXish part of the standard libc function. + * + * Relatively straight-forward: just multiplex the file descriptor operations + * among the various file types (console, FS, network, ...) + */ + +//#define LIBC_VERBOSE +//#define LIBC_DEBUG + +#ifdef LIBC_DEBUG +#define DEBUG(fmt,...) printk(fmt, ##__VA_ARGS__) +#else +#define DEBUG(fmt,...) +#endif + +#ifdef HAVE_LIBC +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_LWIP +#include +#endif + +#define debug(fmt, ...) \ + +#define print_unsupported(fmt, ...) \ + printk("Unsupported function "fmt" called in Mini-OS kernel\n", ## __VA_ARGS__); + +/* Crash on function call */ +#define unsupported_function_crash(function) \ + int __unsup_##function(void) asm(#function); \ + int __unsup_##function(void) \ + { \ + print_unsupported(#function); \ + do_exit(); \ + } + +/* Log and err out on function call */ +#define unsupported_function_log(type, function, ret) \ + type __unsup_##function(void) asm(#function); \ + type __unsup_##function(void) \ + { \ + print_unsupported(#function); \ + errno = ENOSYS; \ + return ret; \ + } + +/* Err out on function call */ +#define unsupported_function(type, function, ret) \ + type __unsup_##function(void) asm(#function); \ + type __unsup_##function(void) \ + { \ + errno = ENOSYS; \ + return ret; \ + } + +#define NOFILE 32 +extern void minios_interface_close_fd(int fd); +extern void minios_evtchn_close_fd(int fd); +extern void minios_gnttab_close_fd(int fd); + +pthread_mutex_t fd_lock = PTHREAD_MUTEX_INITIALIZER; +struct file files[NOFILE] = { + { .type = FTYPE_CONSOLE }, /* stdin */ + { .type = FTYPE_CONSOLE }, /* stdout */ + { .type = FTYPE_CONSOLE }, /* stderr */ +}; + +DECLARE_WAIT_QUEUE_HEAD(event_queue); + +int alloc_fd(enum fd_type type) +{ + int i; + pthread_mutex_lock(&fd_lock); + for (i=0; i 0; i--) + if (files[i].type != FTYPE_NONE) + close(i); + pthread_mutex_unlock(&fd_lock); +} + +int dup2(int oldfd, int newfd) +{ + pthread_mutex_lock(&fd_lock); + if (files[newfd].type != FTYPE_NONE) + close(newfd); + // XXX: this is a bit bogus, as we are supposed to share the offset etc + files[newfd] = files[oldfd]; + pthread_mutex_unlock(&fd_lock); + return 0; +} + +pid_t getpid(void) +{ + return 1; +} + +pid_t getppid(void) +{ + return 1; +} + +pid_t setsid(void) +{ + return 1; +} + +char *getcwd(char *buf, size_t size) +{ + snprintf(buf, size, "/"); + return buf; +} + +#define LOG_PATH "/var/log/" +#define SAVE_PATH "/var/lib/xen" +#define SAVE_CONSOLE 1 +#define RESTORE_CONSOLE 2 + +int mkdir(const char *pathname, mode_t mode) +{ + errno = EIO; + return -1; +} + +#ifdef CONFIG_CONSFRONT +int posix_openpt(int flags) +{ + struct consfront_dev *dev; + + /* Ignore flags */ + + dev = init_consfront(NULL); + dev->fd = alloc_fd(FTYPE_CONSOLE); + files[dev->fd].cons.dev = dev; + + printk("fd(%d) = posix_openpt\n", dev->fd); + return(dev->fd); +} + +int open_savefile(const char *path, int save) +{ + struct consfront_dev *dev; + char nodename[64]; + + snprintf(nodename, sizeof(nodename), "device/console/%d", save ? SAVE_CONSOLE : RESTORE_CONSOLE); + + dev = init_consfront(nodename); + dev->fd = alloc_fd(FTYPE_SAVEFILE); + files[dev->fd].cons.dev = dev; + + printk("fd(%d) = open_savefile\n", dev->fd); + return(dev->fd); +} +#else +int posix_openpt(int flags) +{ + errno = EIO; + return -1; +} +int open_savefile(const char *path, int save) +{ + errno = EIO; + return -1; +} +#endif + +int open(const char *pathname, int flags, ...) +{ + int fd; + /* Ugly, but fine. */ + if (!strncmp(pathname,LOG_PATH,strlen(LOG_PATH))) { + fd = alloc_fd(FTYPE_CONSOLE); + printk("open(%s) -> %d\n", pathname, fd); + return fd; + } + if (!strncmp(pathname, "/dev/mem", strlen("/dev/mem"))) { + fd = alloc_fd(FTYPE_MEM); + printk("open(/dev/mem) -> %d\n", fd); + return fd; + } + if (!strncmp(pathname, "/dev/ptmx", strlen("/dev/ptmx"))) + return posix_openpt(flags); + if (!strncmp(pathname,SAVE_PATH,strlen(SAVE_PATH))) + return open_savefile(pathname, flags & O_WRONLY); + errno = EIO; + return -1; +} + +int isatty(int fd) +{ + return files[fd].type == FTYPE_CONSOLE; +} + +int read(int fd, void *buf, size_t nbytes) +{ + switch (files[fd].type) { + case FTYPE_SAVEFILE: + case FTYPE_CONSOLE: { + int ret; + DEFINE_WAIT(w); + while(1) { + add_waiter(w, console_queue); + ret = xencons_ring_recv(files[fd].cons.dev, buf, nbytes); + if (ret) + break; + schedule(); + } + remove_waiter(w, console_queue); + return ret; + } +#ifdef HAVE_LWIP + case FTYPE_SOCKET: + return lwip_read(files[fd].socket.fd, buf, nbytes); +#endif +#ifdef CONFIG_NETFRONT + case FTYPE_TAP: { + ssize_t ret; + ret = netfront_receive(files[fd].tap.dev, buf, nbytes); + if (ret <= 0) { + errno = EAGAIN; + return -1; + } + return ret; + } +#endif +#ifdef CONFIG_KBDFRONT + case FTYPE_KBD: { + int ret, n; + n = nbytes / sizeof(union xenkbd_in_event); + ret = kbdfront_receive(files[fd].kbd.dev, buf, n); + if (ret <= 0) { + errno = EAGAIN; + return -1; + } + return ret * sizeof(union xenkbd_in_event); + } +#endif +#ifdef CONFIG_FBFRONT + case FTYPE_FB: { + int ret, n; + n = nbytes / sizeof(union xenfb_in_event); + ret = fbfront_receive(files[fd].fb.dev, buf, n); + if (ret <= 0) { + errno = EAGAIN; + return -1; + } + return ret * sizeof(union xenfb_in_event); + } +#endif +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: { + return blkfront_posix_read(fd, buf, nbytes); + } +#endif +#ifdef CONFIG_TPMFRONT + case FTYPE_TPMFRONT: { + return tpmfront_posix_read(fd, buf, nbytes); + } +#endif +#ifdef CONFIG_TPM_TIS + case FTYPE_TPM_TIS: { + return tpm_tis_posix_read(fd, buf, nbytes); + } +#endif + default: + break; + } + printk("read(%d): Bad descriptor\n", fd); + errno = EBADF; + return -1; +} + +int write(int fd, const void *buf, size_t nbytes) +{ + switch (files[fd].type) { + case FTYPE_SAVEFILE: { + int ret = 0, tot = nbytes; + while (nbytes > 0) { + ret = xencons_ring_send(files[fd].cons.dev, (char *)buf, nbytes); + nbytes -= ret; + buf = (char *)buf + ret; + } + return tot - nbytes; + } + case FTYPE_CONSOLE: + console_print(files[fd].cons.dev, (char *)buf, nbytes); + return nbytes; +#ifdef HAVE_LWIP + case FTYPE_SOCKET: + return lwip_write(files[fd].socket.fd, (void*) buf, nbytes); +#endif +#ifdef CONFIG_NETFRONT + case FTYPE_TAP: + netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes); + return nbytes; +#endif +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + return blkfront_posix_write(fd, buf, nbytes); +#endif +#ifdef CONFIG_TPMFRONT + case FTYPE_TPMFRONT: + return tpmfront_posix_write(fd, buf, nbytes); +#endif +#ifdef CONFIG_TPM_TIS + case FTYPE_TPM_TIS: + return tpm_tis_posix_write(fd, buf, nbytes); +#endif + default: + break; + } + printk("write(%d): Bad descriptor\n", fd); + errno = EBADF; + return -1; +} + +off_t lseek(int fd, off_t offset, int whence) +{ + off_t* target = NULL; + switch(files[fd].type) { +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + target = &files[fd].blk.offset; + break; +#endif +#ifdef CONFIG_TPMFRONT + case FTYPE_TPMFRONT: + target = &files[fd].tpmfront.offset; + break; +#endif +#ifdef CONFIG_TPM_TIS + case FTYPE_TPM_TIS: + target = &files[fd].tpm_tis.offset; + break; +#endif + case FTYPE_FILE: + target = &files[fd].file.offset; + break; + default: + /* Not implemented for this filetype */ + errno = ESPIPE; + return (off_t) -1; + } + + switch (whence) { + case SEEK_SET: + *target = offset; + break; + case SEEK_CUR: + *target += offset; + break; + case SEEK_END: + { + struct stat st; + int ret; + ret = fstat(fd, &st); + if (ret) + return -1; + *target = st.st_size + offset; + break; + } + default: + errno = EINVAL; + return -1; + } + return *target; +} + +int fsync(int fd) { + errno = EBADF; + return -1; +} + +int close(int fd) +{ + printk("close(%d)\n", fd); + switch (files[fd].type) { + default: + files[fd].type = FTYPE_NONE; + return 0; +#ifdef CONFIG_XENBUS + case FTYPE_XENBUS: + xs_daemon_close((void*)(intptr_t) fd); + return 0; +#endif +#ifdef HAVE_LWIP + case FTYPE_SOCKET: { + int res = lwip_close(files[fd].socket.fd); + files[fd].type = FTYPE_NONE; + return res; + } +#endif +#ifdef CONFIG_XC + case FTYPE_XC: + minios_interface_close_fd(fd); + return 0; + case FTYPE_EVTCHN: + minios_evtchn_close_fd(fd); + return 0; + case FTYPE_GNTMAP: + minios_gnttab_close_fd(fd); + return 0; +#endif +#ifdef CONFIG_NETFRONT + case FTYPE_TAP: + shutdown_netfront(files[fd].tap.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + shutdown_blkfront(files[fd].blk.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_TPMFRONT + case FTYPE_TPMFRONT: + shutdown_tpmfront(files[fd].tpmfront.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_TPM_TIS + case FTYPE_TPM_TIS: + shutdown_tpm_tis(files[fd].tpm_tis.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_KBDFRONT + case FTYPE_KBD: + shutdown_kbdfront(files[fd].kbd.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_FBFRONT + case FTYPE_FB: + shutdown_fbfront(files[fd].fb.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif +#ifdef CONFIG_CONSFRONT + case FTYPE_SAVEFILE: + case FTYPE_CONSOLE: + fini_console(files[fd].cons.dev); + files[fd].type = FTYPE_NONE; + return 0; +#endif + case FTYPE_NONE: + break; + } + printk("close(%d): Bad descriptor\n", fd); + errno = EBADF; + return -1; +} + +static void init_stat(struct stat *buf) +{ + memset(buf, 0, sizeof(*buf)); + buf->st_dev = 0; + buf->st_ino = 0; + buf->st_nlink = 1; + buf->st_rdev = 0; + buf->st_blksize = 4096; + buf->st_blocks = 0; +} + +int stat(const char *path, struct stat *buf) +{ + errno = EIO; + return -1; +} + +int fstat(int fd, struct stat *buf) +{ + init_stat(buf); + switch (files[fd].type) { + case FTYPE_SAVEFILE: + case FTYPE_CONSOLE: + case FTYPE_SOCKET: { + if (files[fd].type == FTYPE_CONSOLE) + buf->st_mode = S_IFCHR|S_IRUSR|S_IWUSR; + else if (files[fd].type == FTYPE_SOCKET) + buf->st_mode = S_IFSOCK|S_IRUSR|S_IWUSR; + else if (files[fd].type == FTYPE_SAVEFILE) + buf->st_mode = S_IFREG|S_IRUSR|S_IWUSR; + buf->st_uid = 0; + buf->st_gid = 0; + buf->st_size = 0; + buf->st_atime = + buf->st_mtime = + buf->st_ctime = time(NULL); + return 0; + } +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + return blkfront_posix_fstat(fd, buf); +#endif +#ifdef CONFIG_TPMFRONT + case FTYPE_TPMFRONT: + return tpmfront_posix_fstat(fd, buf); +#endif +#ifdef CONFIG_TPM_TIS + case FTYPE_TPM_TIS: + return tpm_tis_posix_fstat(fd, buf); +#endif + default: + break; + } + + printk("statf(%d): Bad descriptor\n", fd); + errno = EBADF; + return -1; +} + +int ftruncate(int fd, off_t length) +{ + errno = EBADF; + return -1; +} + +int remove(const char *pathname) +{ + errno = EIO; + return -1; +} + +int unlink(const char *pathname) +{ + return remove(pathname); +} + +int rmdir(const char *pathname) +{ + return remove(pathname); +} + +int fcntl(int fd, int cmd, ...) +{ + long arg; + va_list ap; + va_start(ap, cmd); + arg = va_arg(ap, long); + va_end(ap); + + switch (cmd) { +#ifdef HAVE_LWIP + case F_SETFL: + if (files[fd].type == FTYPE_SOCKET && !(arg & ~O_NONBLOCK)) { + /* Only flag supported: non-blocking mode */ + uint32_t nblock = !!(arg & O_NONBLOCK); + return lwip_ioctl(files[fd].socket.fd, FIONBIO, &nblock); + } + /* Fallthrough */ +#endif + default: + printk("fcntl(%d, %d, %lx/%lo)\n", fd, cmd, arg, arg); + errno = ENOSYS; + return -1; + } +} + +DIR *opendir(const char *name) +{ + DIR *ret; + ret = malloc(sizeof(*ret)); + ret->name = strdup(name); + ret->offset = 0; + ret->entries = NULL; + ret->curentry = -1; + ret->nbentries = 0; + ret->has_more = 1; + return ret; +} + +struct dirent *readdir(DIR *dir) +{ + return NULL; +} + +int closedir(DIR *dir) +{ + int i; + for (i=0; inbentries; i++) + free(dir->entries[i]); + free(dir->entries); + free(dir->name); + free(dir); + return 0; +} + +/* We assume that only the main thread calls select(). */ + +#if defined(LIBC_DEBUG) || defined(LIBC_VERBOSE) +static const char file_types[] = { + [FTYPE_NONE] = 'N', + [FTYPE_CONSOLE] = 'C', + [FTYPE_XENBUS] = 'S', + [FTYPE_XC] = 'X', + [FTYPE_EVTCHN] = 'E', + [FTYPE_SOCKET] = 's', + [FTYPE_TAP] = 'T', + [FTYPE_BLK] = 'B', + [FTYPE_KBD] = 'K', + [FTYPE_FB] = 'G', +}; +#endif +#ifdef LIBC_DEBUG +static void dump_set(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) +{ + int i, comma; +#define printfds(set) do {\ + comma = 0; \ + for (i = 0; i < nfds; i++) { \ + if (FD_ISSET(i, set)) { \ + if (comma) \ + printk(", "); \ + printk("%d(%c)", i, file_types[files[i].type]); \ + comma = 1; \ + } \ + } \ +} while (0) + + printk("["); + if (readfds) + printfds(readfds); + printk("], ["); + if (writefds) + printfds(writefds); + printk("], ["); + if (exceptfds) + printfds(exceptfds); + printk("], "); + if (timeout) + printk("{ %ld, %ld }", timeout->tv_sec, timeout->tv_usec); +} +#else +#define dump_set(nfds, readfds, writefds, exceptfds, timeout) +#endif + +#ifdef LIBC_DEBUG +static void dump_pollfds(struct pollfd *pfd, int nfds, int timeout) +{ + int i, comma, fd; + + printk("["); + comma = 0; + for (i = 0; i < nfds; i++) { + fd = pfd[i].fd; + if (comma) + printk(", "); + printk("%d(%c)/%02x", fd, file_types[files[fd].type], + pfd[i].events); + comma = 1; + } + printk("]"); + + printk(", %d, %d", nfds, timeout); +} +#else +#define dump_pollfds(pfds, nfds, timeout) +#endif + +/* Just poll without blocking */ +static int select_poll(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds) +{ + int i, n = 0; +#ifdef HAVE_LWIP + int sock_n = 0, sock_nfds = 0; + fd_set sock_readfds, sock_writefds, sock_exceptfds; + struct timeval timeout = { .tv_sec = 0, .tv_usec = 0}; +#endif + +#ifdef LIBC_VERBOSE + static int nb; + static int nbread[NOFILE], nbwrite[NOFILE], nbexcept[NOFILE]; + static s_time_t lastshown; + + nb++; +#endif + +#ifdef HAVE_LWIP + /* first poll network */ + FD_ZERO(&sock_readfds); + FD_ZERO(&sock_writefds); + FD_ZERO(&sock_exceptfds); + for (i = 0; i < nfds; i++) { + if (files[i].type == FTYPE_SOCKET) { + if (FD_ISSET(i, readfds)) { + FD_SET(files[i].socket.fd, &sock_readfds); + sock_nfds = i+1; + } + if (FD_ISSET(i, writefds)) { + FD_SET(files[i].socket.fd, &sock_writefds); + sock_nfds = i+1; + } + if (FD_ISSET(i, exceptfds)) { + FD_SET(files[i].socket.fd, &sock_exceptfds); + sock_nfds = i+1; + } + } + } + if (sock_nfds > 0) { + DEBUG("lwip_select("); + dump_set(nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); + DEBUG("); -> "); + sock_n = lwip_select(sock_nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); + dump_set(nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); + DEBUG("\n"); + } +#endif + + /* Then see others as well. */ + for (i = 0; i < nfds; i++) { + switch(files[i].type) { + default: + if (FD_ISSET(i, readfds) || FD_ISSET(i, writefds) || FD_ISSET(i, exceptfds)) + printk("bogus fd %d in select\n", i); + /* Fallthrough. */ + case FTYPE_CONSOLE: + if (FD_ISSET(i, readfds)) { + if (xencons_ring_avail(files[i].cons.dev)) + n++; + else + FD_CLR(i, readfds); + } + if (FD_ISSET(i, writefds)) + n++; + FD_CLR(i, exceptfds); + break; +#ifdef CONFIG_XENBUS + case FTYPE_XENBUS: + if (FD_ISSET(i, readfds)) { + if (files[i].xenbus.events) + n++; + else + FD_CLR(i, readfds); + } + FD_CLR(i, writefds); + FD_CLR(i, exceptfds); + break; +#endif + case FTYPE_EVTCHN: + case FTYPE_TAP: + case FTYPE_BLK: + case FTYPE_KBD: + case FTYPE_FB: + if (FD_ISSET(i, readfds)) { + if (files[i].read) + n++; + else + FD_CLR(i, readfds); + } + FD_CLR(i, writefds); + FD_CLR(i, exceptfds); + break; +#ifdef HAVE_LWIP + case FTYPE_SOCKET: + if (FD_ISSET(i, readfds)) { + /* Optimize no-network-packet case. */ + if (sock_n && FD_ISSET(files[i].socket.fd, &sock_readfds)) + n++; + else + FD_CLR(i, readfds); + } + if (FD_ISSET(i, writefds)) { + if (sock_n && FD_ISSET(files[i].socket.fd, &sock_writefds)) + n++; + else + FD_CLR(i, writefds); + } + if (FD_ISSET(i, exceptfds)) { + if (sock_n && FD_ISSET(files[i].socket.fd, &sock_exceptfds)) + n++; + else + FD_CLR(i, exceptfds); + } + break; +#endif + } +#ifdef LIBC_VERBOSE + if (FD_ISSET(i, readfds)) + nbread[i]++; + if (FD_ISSET(i, writefds)) + nbwrite[i]++; + if (FD_ISSET(i, exceptfds)) + nbexcept[i]++; +#endif + } +#ifdef LIBC_VERBOSE + if (NOW() > lastshown + 1000000000ull) { + lastshown = NOW(); + printk("%lu MB free, ", num_free_pages() / ((1 << 20) / PAGE_SIZE)); + printk("%d(%d): ", nb, sock_n); + for (i = 0; i < nfds; i++) { + if (nbread[i] || nbwrite[i] || nbexcept[i]) + printk(" %d(%c):", i, file_types[files[i].type]); + if (nbread[i]) + printk(" %dR", nbread[i]); + if (nbwrite[i]) + printk(" %dW", nbwrite[i]); + if (nbexcept[i]) + printk(" %dE", nbexcept[i]); + } + printk("\n"); + memset(nbread, 0, sizeof(nbread)); + memset(nbwrite, 0, sizeof(nbwrite)); + memset(nbexcept, 0, sizeof(nbexcept)); + nb = 0; + } +#endif + return n; +} + +/* The strategy is to + * - announce that we will maybe sleep + * - poll a bit ; if successful, return + * - if timeout, return + * - really sleep (except if somebody woke us in the meanwhile) */ +int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, + struct timeval *timeout) +{ + int n, ret; + fd_set myread, mywrite, myexcept; + struct thread *thread = get_current(); + s_time_t start = NOW(), stop; +#ifdef CONFIG_NETFRONT + DEFINE_WAIT(netfront_w); +#endif + DEFINE_WAIT(event_w); +#ifdef CONFIG_BLKFRONT + DEFINE_WAIT(blkfront_w); +#endif +#ifdef CONFIG_XENBUS + DEFINE_WAIT(xenbus_watch_w); +#endif +#ifdef CONFIG_KBDFRONT + DEFINE_WAIT(kbdfront_w); +#endif + DEFINE_WAIT(console_w); + + assert(thread == main_thread); + + DEBUG("select(%d, ", nfds); + dump_set(nfds, readfds, writefds, exceptfds, timeout); + DEBUG(");\n"); + + if (timeout) + stop = start + SECONDS(timeout->tv_sec) + timeout->tv_usec * 1000; + else + /* just make gcc happy */ + stop = start; + + /* Tell people we're going to sleep before looking at what they are + * saying, hence letting them wake us if events happen between here and + * schedule() */ +#ifdef CONFIG_NETFRONT + add_waiter(netfront_w, netfront_queue); +#endif + add_waiter(event_w, event_queue); +#ifdef CONFIG_BLKFRONT + add_waiter(blkfront_w, blkfront_queue); +#endif +#ifdef CONFIG_XENBUS + add_waiter(xenbus_watch_w, xenbus_watch_queue); +#endif +#ifdef CONFIG_KBDFRONT + add_waiter(kbdfront_w, kbdfront_queue); +#endif + add_waiter(console_w, console_queue); + + if (readfds) + myread = *readfds; + else + FD_ZERO(&myread); + if (writefds) + mywrite = *writefds; + else + FD_ZERO(&mywrite); + if (exceptfds) + myexcept = *exceptfds; + else + FD_ZERO(&myexcept); + + DEBUG("polling "); + dump_set(nfds, &myread, &mywrite, &myexcept, timeout); + DEBUG("\n"); + n = select_poll(nfds, &myread, &mywrite, &myexcept); + + if (n) { + dump_set(nfds, readfds, writefds, exceptfds, timeout); + if (readfds) + *readfds = myread; + if (writefds) + *writefds = mywrite; + if (exceptfds) + *exceptfds = myexcept; + DEBUG(" -> "); + dump_set(nfds, readfds, writefds, exceptfds, timeout); + DEBUG("\n"); + wake(thread); + ret = n; + goto out; + } + if (timeout && NOW() >= stop) { + if (readfds) + FD_ZERO(readfds); + if (writefds) + FD_ZERO(writefds); + if (exceptfds) + FD_ZERO(exceptfds); + timeout->tv_sec = 0; + timeout->tv_usec = 0; + wake(thread); + ret = 0; + goto out; + } + + if (timeout) + thread->wakeup_time = stop; + schedule(); + + if (readfds) + myread = *readfds; + else + FD_ZERO(&myread); + if (writefds) + mywrite = *writefds; + else + FD_ZERO(&mywrite); + if (exceptfds) + myexcept = *exceptfds; + else + FD_ZERO(&myexcept); + + n = select_poll(nfds, &myread, &mywrite, &myexcept); + + if (n) { + if (readfds) + *readfds = myread; + if (writefds) + *writefds = mywrite; + if (exceptfds) + *exceptfds = myexcept; + ret = n; + goto out; + } + errno = EINTR; + ret = -1; + +out: +#ifdef CONFIG_NETFRONT + remove_waiter(netfront_w, netfront_queue); +#endif + remove_waiter(event_w, event_queue); +#ifdef CONFIG_BLKFRONT + remove_waiter(blkfront_w, blkfront_queue); +#endif +#ifdef CONFIG_XENBUS + remove_waiter(xenbus_watch_w, xenbus_watch_queue); +#endif +#ifdef CONFIG_KBDFRONT + remove_waiter(kbdfront_w, kbdfront_queue); +#endif + remove_waiter(console_w, console_queue); + return ret; +} + +/* Wrap around select */ +int poll(struct pollfd _pfd[], nfds_t _nfds, int _timeout) +{ + int n, ret; + int i, fd; + struct timeval _timeo, *timeo = NULL; + fd_set rfds, wfds, efds; + int max_fd = -1; + + DEBUG("poll("); + dump_pollfds(_pfd, _nfds, _timeout); + DEBUG(")\n"); + + FD_ZERO(&rfds); + FD_ZERO(&wfds); + FD_ZERO(&efds); + + n = 0; + + for (i = 0; i < _nfds; i++) { + fd = _pfd[i].fd; + _pfd[i].revents = 0; + + /* fd < 0, revents = 0, which is already set */ + if (fd < 0) continue; + + /* fd is invalid, revents = POLLNVAL, increment counter */ + if (fd >= NOFILE || files[fd].type == FTYPE_NONE) { + n++; + _pfd[i].revents |= POLLNVAL; + continue; + } + + /* normal case, map POLL* into readfds and writefds: + * POLLIN -> readfds + * POLLOUT -> writefds + * POLL* -> none + */ + if (_pfd[i].events & POLLIN) + FD_SET(fd, &rfds); + if (_pfd[i].events & POLLOUT) + FD_SET(fd, &wfds); + /* always set exceptfds */ + FD_SET(fd, &efds); + if (fd > max_fd) + max_fd = fd; + } + + /* should never sleep when we already have events */ + if (n) { + _timeo.tv_sec = 0; + _timeo.tv_usec = 0; + timeo = &_timeo; + } else if (_timeout >= 0) { + /* normal case, construct _timeout, might sleep */ + _timeo.tv_sec = _timeout / 1000; + _timeo.tv_usec = (_timeout % 1000) * 1000; + timeo = &_timeo; + } else { + /* _timeout < 0, block forever */ + timeo = NULL; + } + + + ret = select(max_fd+1, &rfds, &wfds, &efds, timeo); + /* error in select, just return, errno is set by select() */ + if (ret < 0) + return ret; + + for (i = 0; i < _nfds; i++) { + fd = _pfd[i].fd; + + /* the revents has already been set for all error case */ + if (fd < 0 || fd >= NOFILE || files[fd].type == FTYPE_NONE) + continue; + + if (FD_ISSET(fd, &rfds) || FD_ISSET(fd, &wfds) || FD_ISSET(fd, &efds)) + n++; + if (FD_ISSET(fd, &efds)) { + /* anything bad happens we set POLLERR */ + _pfd[i].revents |= POLLERR; + continue; + } + if (FD_ISSET(fd, &rfds)) + _pfd[i].revents |= POLLIN; + if (FD_ISSET(fd, &wfds)) + _pfd[i].revents |= POLLOUT; + } + + return n; +} + +#ifdef HAVE_LWIP +int socket(int domain, int type, int protocol) +{ + int fd, res; + fd = lwip_socket(domain, type, protocol); + if (fd < 0) + return -1; + res = alloc_fd(FTYPE_SOCKET); + printk("socket -> %d\n", res); + files[res].socket.fd = fd; + return res; +} + +int accept(int s, struct sockaddr *addr, socklen_t *addrlen) +{ + int fd, res; + if (files[s].type != FTYPE_SOCKET) { + printk("accept(%d): Bad descriptor\n", s); + errno = EBADF; + return -1; + } + fd = lwip_accept(files[s].socket.fd, addr, addrlen); + if (fd < 0) + return -1; + res = alloc_fd(FTYPE_SOCKET); + files[res].socket.fd = fd; + printk("accepted on %d -> %d\n", s, res); + return res; +} + +#define LWIP_STUB(ret, name, proto, args) \ +ret name proto \ +{ \ + if (files[s].type != FTYPE_SOCKET) { \ + printk(#name "(%d): Bad descriptor\n", s); \ + errno = EBADF; \ + return -1; \ + } \ + s = files[s].socket.fd; \ + return lwip_##name args; \ +} + +LWIP_STUB(int, bind, (int s, struct sockaddr *my_addr, socklen_t addrlen), (s, my_addr, addrlen)) +LWIP_STUB(int, getsockopt, (int s, int level, int optname, void *optval, socklen_t *optlen), (s, level, optname, optval, optlen)) +LWIP_STUB(int, setsockopt, (int s, int level, int optname, void *optval, socklen_t optlen), (s, level, optname, optval, optlen)) +LWIP_STUB(int, connect, (int s, struct sockaddr *serv_addr, socklen_t addrlen), (s, serv_addr, addrlen)) +LWIP_STUB(int, listen, (int s, int backlog), (s, backlog)); +LWIP_STUB(ssize_t, recv, (int s, void *buf, size_t len, int flags), (s, buf, len, flags)) +LWIP_STUB(ssize_t, recvfrom, (int s, void *buf, size_t len, int flags, struct sockaddr *from, socklen_t *fromlen), (s, buf, len, flags, from, fromlen)) +LWIP_STUB(ssize_t, send, (int s, void *buf, size_t len, int flags), (s, buf, len, flags)) +LWIP_STUB(ssize_t, sendto, (int s, void *buf, size_t len, int flags, struct sockaddr *to, socklen_t tolen), (s, buf, len, flags, to, tolen)) +LWIP_STUB(int, getsockname, (int s, struct sockaddr *name, socklen_t *namelen), (s, name, namelen)) +#endif + +static char *syslog_ident; +void openlog(const char *ident, int option, int facility) +{ + free(syslog_ident); + syslog_ident = strdup(ident); +} + +void vsyslog(int priority, const char *format, va_list ap) +{ + printk("%s: ", syslog_ident); + print(0, format, ap); +} + +void syslog(int priority, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vsyslog(priority, format, ap); + va_end(ap); +} + +void closelog(void) +{ + free(syslog_ident); + syslog_ident = NULL; +} + +void vwarn(const char *format, va_list ap) +{ + int the_errno = errno; + printk("stubdom: "); + if (format) { + print(0, format, ap); + printk(", "); + } + printk("%s", strerror(the_errno)); +} + +void warn(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vwarn(format, ap); + va_end(ap); +} + +void verr(int eval, const char *format, va_list ap) +{ + vwarn(format, ap); + exit(eval); +} + +void err(int eval, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + verr(eval, format, ap); + va_end(ap); +} + +void vwarnx(const char *format, va_list ap) +{ + printk("stubdom: "); + if (format) + print(0, format, ap); +} + +void warnx(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vwarnx(format, ap); + va_end(ap); +} + +void verrx(int eval, const char *format, va_list ap) +{ + vwarnx(format, ap); + exit(eval); +} + +void errx(int eval, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + verrx(eval, format, ap); + va_end(ap); +} + +int nanosleep(const struct timespec *req, struct timespec *rem) +{ + s_time_t start = NOW(); + s_time_t stop = start + SECONDS(req->tv_sec) + req->tv_nsec; + s_time_t stopped; + struct thread *thread = get_current(); + + thread->wakeup_time = stop; + clear_runnable(thread); + schedule(); + stopped = NOW(); + + if (rem) + { + s_time_t remaining = stop - stopped; + if (remaining > 0) + { + rem->tv_nsec = remaining % 1000000000ULL; + rem->tv_sec = remaining / 1000000000ULL; + } else memset(rem, 0, sizeof(*rem)); + } + + return 0; +} + +int usleep(useconds_t usec) +{ + /* "usec shall be less than one million." */ + struct timespec req; + req.tv_nsec = usec * 1000; + req.tv_sec = 0; + + if (nanosleep(&req, NULL)) + return -1; + + return 0; +} + +unsigned int sleep(unsigned int seconds) +{ + struct timespec req, rem; + req.tv_sec = seconds; + req.tv_nsec = 0; + + if (nanosleep(&req, &rem)) + return -1; + + if (rem.tv_nsec > 0) + rem.tv_sec++; + + return rem.tv_sec; +} + +int clock_gettime(clockid_t clk_id, struct timespec *tp) +{ + switch (clk_id) { + case CLOCK_MONOTONIC: + { + struct timeval tv; + + gettimeofday(&tv, NULL); + + tp->tv_sec = tv.tv_sec; + tp->tv_nsec = tv.tv_usec * 1000; + + break; + } + case CLOCK_REALTIME: + { + uint64_t nsec = monotonic_clock(); + + tp->tv_sec = nsec / 1000000000ULL; + tp->tv_nsec = nsec % 1000000000ULL; + + break; + } + default: + print_unsupported("clock_gettime(%ld)", (long) clk_id); + errno = EINVAL; + return -1; + } + + return 0; +} + +uid_t getuid(void) +{ + return 0; +} + +uid_t geteuid(void) +{ + return 0; +} + +gid_t getgid(void) +{ + return 0; +} + +gid_t getegid(void) +{ + return 0; +} + +int gethostname(char *name, size_t namelen) +{ + strncpy(name, "mini-os", namelen); + return 0; +} + +size_t getpagesize(void) +{ + return PAGE_SIZE; +} + +void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) +{ + unsigned long n = (length + PAGE_SIZE - 1) / PAGE_SIZE; + + ASSERT(!start); + ASSERT(prot == (PROT_READ|PROT_WRITE)); + ASSERT((fd == -1 && (flags == (MAP_SHARED|MAP_ANON) || flags == (MAP_PRIVATE|MAP_ANON))) + || (fd != -1 && flags == MAP_SHARED)); + + if (fd == -1) + return map_zero(n, 1); +#ifdef CONFIG_XC + else if (files[fd].type == FTYPE_XC) { + unsigned long zero = 0; + return map_frames_ex(&zero, n, 0, 0, 1, DOMID_SELF, NULL, 0); + } +#endif + else if (files[fd].type == FTYPE_MEM) { + unsigned long first_mfn = offset >> PAGE_SHIFT; + return map_frames_ex(&first_mfn, n, 0, 1, 1, DOMID_IO, NULL, _PAGE_PRESENT|_PAGE_RW); + } else ASSERT(0); +} + +int munmap(void *start, size_t length) +{ + int total = length / PAGE_SIZE; + int ret; + + ret = unmap_frames((unsigned long)start, (unsigned long)total); + if (ret) { + errno = ret; + return -1; + } + return 0; +} + +void sparse(unsigned long data, size_t size) +{ + unsigned long newdata; + xen_pfn_t *mfns; + int i, n; + + newdata = (data + PAGE_SIZE - 1) & PAGE_MASK; + if (newdata - data > size) + return; + size -= newdata - data; + data = newdata; + n = size / PAGE_SIZE; + size = n * PAGE_SIZE; + + mfns = malloc(n * sizeof(*mfns)); + for (i = 0; i < n; i++) { +#ifdef LIBC_DEBUG + int j; + for (j=0; j> 20, data); + + munmap((void *) data, size); + free_physical_pages(mfns, n); + do_map_zero(data, n); +} + +int nice(int inc) +{ + printk("nice() stub called with inc=%d\n", inc); + return 0; +} + + +/* Not supported by FS yet. */ +unsupported_function_crash(link); +unsupported_function(int, readlink, -1); +unsupported_function_crash(umask); + +/* We could support that. */ +unsupported_function_log(int, chdir, -1); + +/* No dynamic library support. */ +unsupported_function_log(void *, dlopen, NULL); +unsupported_function_log(void *, dlsym, NULL); +unsupported_function_log(char *, dlerror, NULL); +unsupported_function_log(int, dlclose, -1); + +/* We don't raise signals anyway. */ +unsupported_function(int, sigemptyset, -1); +unsupported_function(int, sigfillset, -1); +unsupported_function(int, sigaddset, -1); +unsupported_function(int, sigdelset, -1); +unsupported_function(int, sigismember, -1); +unsupported_function(int, sigprocmask, -1); +unsupported_function(int, sigaction, -1); +unsupported_function(int, __sigsetjmp, 0); +unsupported_function(int, sigaltstack, -1); +unsupported_function_crash(kill); + +/* Unsupported */ +unsupported_function_crash(pipe); +unsupported_function_crash(fork); +unsupported_function_crash(execv); +unsupported_function_crash(execve); +unsupported_function_crash(waitpid); +unsupported_function_crash(wait); +unsupported_function_crash(lockf); +unsupported_function_crash(sysconf); +unsupported_function(int, tcsetattr, -1); +unsupported_function(int, tcgetattr, 0); +unsupported_function(int, grantpt, -1); +unsupported_function(int, unlockpt, -1); +unsupported_function(char *, ptsname, NULL); + +/* net/if.h */ +unsupported_function_log(unsigned int, if_nametoindex, -1); +unsupported_function_log(char *, if_indextoname, (char *) NULL); +unsupported_function_log(struct if_nameindex *, if_nameindex, (struct if_nameindex *) NULL); +unsupported_function_crash(if_freenameindex); + +/* Linuxish abi for the Caml runtime, don't support + Log, and return an error code if possible. If it is not possible + to inform the application of an error, then crash instead! +*/ +unsupported_function_log(struct dirent *, readdir64, NULL); +unsupported_function_log(int, getrusage, -1); +unsupported_function_log(int, getrlimit, -1); +unsupported_function_log(int, getrlimit64, -1); +unsupported_function_log(int, __xstat64, -1); +unsupported_function_log(long, __strtol_internal, LONG_MIN); +unsupported_function_log(double, __strtod_internal, HUGE_VAL); +unsupported_function_log(int, utime, -1); +unsupported_function_log(int, truncate64, -1); +unsupported_function_log(int, tcflow, -1); +unsupported_function_log(int, tcflush, -1); +unsupported_function_log(int, tcdrain, -1); +unsupported_function_log(int, tcsendbreak, -1); +unsupported_function_log(int, cfsetospeed, -1); +unsupported_function_log(int, cfsetispeed, -1); +unsupported_function_crash(cfgetospeed); +unsupported_function_crash(cfgetispeed); +unsupported_function_log(int, symlink, -1); +unsupported_function_log(const char*, inet_ntop, NULL); +unsupported_function_crash(__fxstat64); +unsupported_function_crash(__lxstat64); +unsupported_function_log(int, socketpair, -1); +unsupported_function_crash(sigsuspend); +unsupported_function_log(int, sigpending, -1); +unsupported_function_log(int, shutdown, -1); +unsupported_function_log(int, setuid, -1); +unsupported_function_log(int, setgid, -1); +unsupported_function_crash(rewinddir); +unsupported_function_log(int, getpriority, -1); +unsupported_function_log(int, setpriority, -1); +unsupported_function_log(int, mkfifo, -1); +unsupported_function_log(int, getitimer, -1); +unsupported_function_log(int, setitimer, -1); +unsupported_function_log(void *, getservbyport, NULL); +unsupported_function_log(void *, getservbyname, NULL); +unsupported_function_log(void *, getpwuid, NULL); +unsupported_function_log(void *, getpwnam, NULL); +unsupported_function_log(void *, getprotobynumber, NULL); +unsupported_function_log(void *, getprotobyname, NULL); +unsupported_function_log(int, getpeername, -1); +unsupported_function_log(int, getnameinfo, -1); +unsupported_function_log(char *, getlogin, NULL); +unsupported_function_crash(__h_errno_location); +unsupported_function_log(int, gethostbyname_r, -1); +unsupported_function_log(int, gethostbyaddr_r, -1); +unsupported_function_log(int, getgroups, -1); +unsupported_function_log(void *, getgrgid, NULL); +unsupported_function_log(void *, getgrnam, NULL); +unsupported_function_log(int, getaddrinfo, -1); +unsupported_function_log(int, freeaddrinfo, -1); +unsupported_function_log(int, ftruncate64, -1); +unsupported_function_log(int, fchown, -1); +unsupported_function_log(int, fchmod, -1); +unsupported_function_crash(execvp); +unsupported_function_log(int, dup, -1) +unsupported_function_log(int, chroot, -1) +unsupported_function_log(int, chown, -1); +unsupported_function_log(int, chmod, -1); +unsupported_function_crash(alarm); +unsupported_function_log(int, inet_pton, -1); +unsupported_function_log(int, access, -1); +#endif diff -Nru xen-4.6.0/extras/mini-os/lib/xmalloc.c xen-4.6.5/extras/mini-os/lib/xmalloc.c --- xen-4.6.0/extras/mini-os/lib/xmalloc.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/lib/xmalloc.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,319 @@ +/* + **************************************************************************** + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: xmaloc.c + * Author: Grzegorz Milos (gm281@cam.ac.uk) + * Samuel Thibault (samuel.thibault@eu.citrix.com) + * Changes: + * + * Date: Aug 2005 + * Jan 2008 + * + * Environment: Xen Minimal OS + * Description: simple memory allocator + * + **************************************************************************** + * Simple allocator for Mini-os. If larger than a page, simply use the + * page-order allocator. + * + * Copy of the allocator for Xen by Rusty Russell: + * Copyright (C) 2005 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include + +#ifndef HAVE_LIBC +/* static spinlock_t freelist_lock = SPIN_LOCK_UNLOCKED; */ + +struct xmalloc_hdr +{ + /* Total including this hdr, unused padding and second hdr. */ + size_t size; + MINIOS_TAILQ_ENTRY(struct xmalloc_hdr) freelist; +} __cacheline_aligned; + +static MINIOS_TAILQ_HEAD(,struct xmalloc_hdr) freelist = + MINIOS_TAILQ_HEAD_INITIALIZER(freelist); + +/* Unused padding data between the two hdrs. */ + +struct xmalloc_pad +{ + /* Size including both hdrs. */ + size_t hdr_size; +}; + +/* Return size, increased to alignment with align. */ +static inline size_t align_up(size_t size, size_t align) +{ + return (size + align - 1) & ~(align - 1); +} + +static void maybe_split(struct xmalloc_hdr *hdr, size_t size, size_t block) +{ + struct xmalloc_hdr *extra; + size_t leftover; + size = align_up(size, __alignof__(struct xmalloc_hdr)); + size = align_up(size, __alignof__(struct xmalloc_pad)); + leftover = block - size; + + /* If enough is left to make a block, put it on free list. */ + if ( leftover >= (2 * (sizeof(struct xmalloc_hdr) + sizeof(struct xmalloc_pad))) ) + { + extra = (struct xmalloc_hdr *)((unsigned long)hdr + size); + extra->size = leftover; + /* spin_lock_irqsave(&freelist_lock, flags); */ + MINIOS_TAILQ_INSERT_HEAD(&freelist, extra, freelist); + /* spin_unlock_irqrestore(&freelist_lock, flags); */ + } + else + { + size = block; + } + + hdr->size = size; +} + +static struct xmalloc_hdr *xmalloc_new_page(size_t size) +{ + struct xmalloc_hdr *hdr; + /* unsigned long flags; */ + + hdr = (struct xmalloc_hdr *)alloc_page(); + if ( hdr == NULL ) + return NULL; + + maybe_split(hdr, size, PAGE_SIZE); + + return hdr; +} + +/* Big object? Just use the page allocator. */ +static void *xmalloc_whole_pages(size_t size, size_t align) +{ + struct xmalloc_hdr *hdr; + struct xmalloc_pad *pad; + unsigned int pageorder; + void *ret; + /* Room for headers */ + size_t hdr_size = sizeof(struct xmalloc_hdr) + sizeof(struct xmalloc_pad); + /* Align for actual beginning of data */ + hdr_size = align_up(hdr_size, align); + + pageorder = get_order(hdr_size + size); + + hdr = (struct xmalloc_hdr *)alloc_pages(pageorder); + if ( hdr == NULL ) + return NULL; + + hdr->size = (1UL << (pageorder + PAGE_SHIFT)); + + ret = (char*)hdr + hdr_size; + pad = (struct xmalloc_pad *) ret - 1; + pad->hdr_size = hdr_size; + return ret; +} + +void *_xmalloc(size_t size, size_t align) +{ + struct xmalloc_hdr *i, *tmp, *hdr = NULL; + uintptr_t data_begin; + size_t hdr_size; + /* unsigned long flags; */ + + hdr_size = sizeof(struct xmalloc_hdr) + sizeof(struct xmalloc_pad); + /* Align on headers requirements. */ + align = align_up(align, __alignof__(struct xmalloc_hdr)); + align = align_up(align, __alignof__(struct xmalloc_pad)); + + /* For big allocs, give them whole pages. */ + if ( size + align_up(hdr_size, align) >= PAGE_SIZE ) + return xmalloc_whole_pages(size, align); + + /* Search free list. */ + /* spin_lock_irqsave(&freelist_lock, flags); */ + MINIOS_TAILQ_FOREACH_SAFE(i, &freelist, freelist, tmp) + { + data_begin = align_up((uintptr_t)i + hdr_size, align); + + if ( data_begin + size > (uintptr_t)i + i->size ) + continue; + + MINIOS_TAILQ_REMOVE(&freelist, i, freelist); + /* spin_unlock_irqrestore(&freelist_lock, flags); */ + + uintptr_t size_before = (data_begin - hdr_size) - (uintptr_t)i; + + if (size_before >= 2 * hdr_size) { + /* Worth splitting the beginning */ + struct xmalloc_hdr *new_i = (void*)(data_begin - hdr_size); + new_i->size = i->size - size_before; + i->size = size_before; + /* spin_lock_irqsave(&freelist_lock, flags); */ + MINIOS_TAILQ_INSERT_HEAD(&freelist, i, freelist); + /* spin_unlock_irqrestore(&freelist_lock, flags); */ + i = new_i; + } + maybe_split(i, (data_begin + size) - (uintptr_t)i, i->size); + hdr = i; + break; + } + + if (!hdr) { + /* spin_unlock_irqrestore(&freelist_lock, flags); */ + + /* Alloc a new page and return from that. */ + hdr = xmalloc_new_page(align_up(hdr_size, align) + size); + if ( hdr == NULL ) + return NULL; + data_begin = (uintptr_t)hdr + align_up(hdr_size, align); + } + + struct xmalloc_pad *pad = (struct xmalloc_pad *) data_begin - 1; + pad->hdr_size = data_begin - (uintptr_t)hdr; + BUG_ON(data_begin % align); + return (void*)data_begin; +} + +void xfree(const void *p) +{ + /* unsigned long flags; */ + struct xmalloc_hdr *i, *tmp, *hdr; + struct xmalloc_pad *pad; + + if ( p == NULL ) + return; + + pad = (struct xmalloc_pad *)p - 1; + hdr = (struct xmalloc_hdr *)((char *)p - pad->hdr_size); + + /* Big allocs free directly. */ + if ( hdr->size >= PAGE_SIZE ) + { + free_pages(hdr, get_order(hdr->size)); + return; + } + + /* We know hdr will be on same page. */ + if(((long)p & PAGE_MASK) != ((long)hdr & PAGE_MASK)) + { + printk("Header should be on the same page\n"); + *(int*)0=0; + } + + /* Merge with other free block, or put in list. */ + /* spin_lock_irqsave(&freelist_lock, flags); */ + MINIOS_TAILQ_FOREACH_SAFE(i, &freelist, freelist, tmp) + { + unsigned long _i = (unsigned long)i; + unsigned long _hdr = (unsigned long)hdr; + + /* Do not merge across page boundaries. */ + if ( ((_i ^ _hdr) & PAGE_MASK) != 0 ) + continue; + + /* We follow this block? Swallow it. */ + if ( (_i + i->size) == _hdr ) + { + MINIOS_TAILQ_REMOVE(&freelist, i, freelist); + i->size += hdr->size; + hdr = i; + } + + /* We precede this block? Swallow it. */ + if ( (_hdr + hdr->size) == _i ) + { + MINIOS_TAILQ_REMOVE(&freelist, i, freelist); + hdr->size += i->size; + } + } + + /* Did we merge an entire page? */ + if ( hdr->size == PAGE_SIZE ) + { + if((((unsigned long)hdr) & (PAGE_SIZE-1)) != 0) + { + printk("Bug\n"); + *(int*)0=0; + } + free_page(hdr); + } + else + { + MINIOS_TAILQ_INSERT_HEAD(&freelist, hdr, freelist); + } + + /* spin_unlock_irqrestore(&freelist_lock, flags); */ +} + +void *malloc(size_t size) +{ + return _xmalloc(size, DEFAULT_ALIGN); +} + +void *realloc(void *ptr, size_t size) +{ + void *new; + struct xmalloc_hdr *hdr; + struct xmalloc_pad *pad; + size_t old_data_size; + + if (ptr == NULL) + return _xmalloc(size, DEFAULT_ALIGN); + + pad = (struct xmalloc_pad *)ptr - 1; + hdr = (struct xmalloc_hdr *)((char*)ptr - pad->hdr_size); + + old_data_size = hdr->size - pad->hdr_size; + if ( old_data_size >= size ) + { + maybe_split(hdr, pad->hdr_size + size, hdr->size); + return ptr; + } + + new = _xmalloc(size, DEFAULT_ALIGN); + if (new == NULL) + return NULL; + + memcpy(new, ptr, old_data_size); + xfree(ptr); + + return new; +} + +void free(void *ptr) +{ + xfree(ptr); +} +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/lib/xs.c xen-4.6.5/extras/mini-os/lib/xs.c --- xen-4.6.0/extras/mini-os/lib/xs.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/lib/xs.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,194 @@ +/* + * libxs-compatible layer + * + * Samuel Thibault , 2007-2008 + * + * Mere wrapper around xenbus_* + */ + +#ifdef HAVE_LIBC +#include +#include +#include +#include +#include +#include + +static inline int _xs_fileno(struct xs_handle *h) { + return (intptr_t) h; +} + +struct xs_handle *xs_daemon_open() +{ + int fd = alloc_fd(FTYPE_XENBUS); + files[fd].xenbus.events = NULL; + printk("xs_daemon_open -> %d, %p\n", fd, &files[fd].xenbus.events); + return (void*)(intptr_t) fd; +} + +void xs_daemon_close(struct xs_handle *h) +{ + int fd = _xs_fileno(h); + struct xenbus_event *event, *next; + for (event = files[fd].xenbus.events; event; event = next) + { + next = event->next; + free(event); + } + files[fd].type = FTYPE_NONE; +} + +int xs_fileno(struct xs_handle *h) +{ + return _xs_fileno(h); +} + +void *xs_read(struct xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *len) +{ + char *value; + char *msg; + + msg = xenbus_read(t, path, &value); + if (msg) { + printk("xs_read(%s): %s\n", path, msg); + free(msg); + return NULL; + } + + if (len) + *len = strlen(value); + return value; +} + +bool xs_write(struct xs_handle *h, xs_transaction_t t, + const char *path, const void *data, unsigned int len) +{ + char value[len + 1]; + char *msg; + + memcpy(value, data, len); + value[len] = 0; + + msg = xenbus_write(t, path, value); + if (msg) { + printk("xs_write(%s): %s\n", path, msg); + free(msg); + return false; + } + return true; +} + +static bool xs_bool(char *reply) +{ + if (!reply) + return true; + free(reply); + return false; +} + +bool xs_rm(struct xs_handle *h, xs_transaction_t t, const char *path) +{ + return xs_bool(xenbus_rm(t, path)); +} + +static void *xs_talkv(struct xs_handle *h, xs_transaction_t t, + enum xsd_sockmsg_type type, + struct write_req *iovec, + unsigned int num_vecs, + unsigned int *len) +{ + struct xsd_sockmsg *msg; + void *ret; + + msg = xenbus_msg_reply(type, t, iovec, num_vecs); + ret = malloc(msg->len); + memcpy(ret, (char*) msg + sizeof(*msg), msg->len); + if (len) + *len = msg->len - 1; + free(msg); + return ret; +} + +static void *xs_single(struct xs_handle *h, xs_transaction_t t, + enum xsd_sockmsg_type type, + const char *string, + unsigned int *len) +{ + struct write_req iovec; + + iovec.data = (void *)string; + iovec.len = strlen(string) + 1; + + return xs_talkv(h, t, type, &iovec, 1, len); +} + +char *xs_get_domain_path(struct xs_handle *h, unsigned int domid) +{ + char domid_str[MAX_STRLEN(domid)]; + + sprintf(domid_str, "%u", domid); + + return xs_single(h, XBT_NULL, XS_GET_DOMAIN_PATH, domid_str, NULL); +} + +char **xs_directory(struct xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *num) +{ + char *msg; + char **entries, **res; + char *entry; + int i, n; + int size; + + msg = xenbus_ls(t, path, &res); + if (msg) { + printk("xs_directory(%s): %s\n", path, msg); + free(msg); + return NULL; + } + + size = 0; + for (n = 0; res[n]; n++) + size += strlen(res[n]) + 1; + + entries = malloc(n * sizeof(char *) + size); + entry = (char *) (&entries[n]); + + for (i = 0; i < n; i++) { + int l = strlen(res[i]) + 1; + memcpy(entry, res[i], l); + free(res[i]); + entries[i] = entry; + entry += l; + } + + *num = n; + free(res); + return entries; +} + +bool xs_watch(struct xs_handle *h, const char *path, const char *token) +{ + int fd = _xs_fileno(h); + printk("xs_watch(%s, %s)\n", path, token); + return xs_bool(xenbus_watch_path_token(XBT_NULL, path, token, &files[fd].xenbus.events)); +} + +char **xs_read_watch(struct xs_handle *h, unsigned int *num) +{ + int fd = _xs_fileno(h); + struct xenbus_event *event; + event = files[fd].xenbus.events; + files[fd].xenbus.events = event->next; + printk("xs_read_watch() -> %s %s\n", event->path, event->token); + *num = 2; + return (char **) &event->path; +} + +bool xs_unwatch(struct xs_handle *h, const char *path, const char *token) +{ + printk("xs_unwatch(%s, %s)\n", path, token); + return xs_bool(xenbus_unwatch_path_token(XBT_NULL, path, token)); +} +#endif diff -Nru xen-4.6.0/extras/mini-os/lock.c xen-4.6.5/extras/mini-os/lock.c --- xen-4.6.0/extras/mini-os/lock.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/lock.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,112 @@ +/* + * locks for newlib + * + * Samuel Thibault , July 20008 + */ + +#ifdef HAVE_LIBC + +#include +#include +#include +#include + +int ___lock_init(_LOCK_T *lock) +{ + lock->busy = 0; + init_waitqueue_head(&lock->wait); + return 0; +} + +int ___lock_acquire(_LOCK_T *lock) +{ + unsigned long flags; + while(1) { + wait_event(lock->wait, !lock->busy); + local_irq_save(flags); + if (!lock->busy) + break; + local_irq_restore(flags); + } + lock->busy = 1; + local_irq_restore(flags); + return 0; +} + +int ___lock_try_acquire(_LOCK_T *lock) +{ + unsigned long flags; + int ret = -1; + local_irq_save(flags); + if (!lock->busy) { + lock->busy = 1; + ret = 0; + } + local_irq_restore(flags); + return ret; +} + +int ___lock_release(_LOCK_T *lock) +{ + unsigned long flags; + local_irq_save(flags); + lock->busy = 0; + wake_up(&lock->wait); + local_irq_restore(flags); + return 0; +} + + +int ___lock_init_recursive(_LOCK_RECURSIVE_T *lock) +{ + lock->owner = NULL; + init_waitqueue_head(&lock->wait); + return 0; +} + +int ___lock_acquire_recursive(_LOCK_RECURSIVE_T *lock) +{ + unsigned long flags; + if (lock->owner != get_current()) { + while (1) { + wait_event(lock->wait, lock->owner == NULL); + local_irq_save(flags); + if (lock->owner == NULL) + break; + local_irq_restore(flags); + } + lock->owner = get_current(); + local_irq_restore(flags); + } + lock->count++; + return 0; +} + +int ___lock_try_acquire_recursive(_LOCK_RECURSIVE_T *lock) +{ + unsigned long flags; + int ret = -1; + local_irq_save(flags); + if (!lock->owner) { + ret = 0; + lock->owner = get_current(); + lock->count++; + } + local_irq_restore(flags); + return ret; +} + +int ___lock_release_recursive(_LOCK_RECURSIVE_T *lock) +{ + unsigned long flags; + BUG_ON(lock->owner != get_current()); + if (--lock->count) + return 0; + local_irq_save(flags); + lock->owner = NULL; + wake_up(&lock->wait); + local_irq_restore(flags); + return 0; +} + +#endif diff -Nru xen-4.6.0/extras/mini-os/lwip-arch.c xen-4.6.5/extras/mini-os/lwip-arch.c --- xen-4.6.0/extras/mini-os/lwip-arch.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/lwip-arch.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,294 @@ +/* + * lwip-arch.c + * + * Arch-specific semaphores and mailboxes for lwIP running on mini-os + * + * Tim Deegan , July 2007 + */ + +#include +#include +#include +#include +#include +#include + +/* Is called to initialize the sys_arch layer */ +void sys_init(void) +{ +} + +/* Creates and returns a new semaphore. The "count" argument specifies + * the initial state of the semaphore. */ +sys_sem_t sys_sem_new(uint8_t count) +{ + struct semaphore *sem = xmalloc(struct semaphore); + sem->count = count; + init_waitqueue_head(&sem->wait); + return sem; +} + +/* Deallocates a semaphore. */ +void sys_sem_free(sys_sem_t sem) +{ + xfree(sem); +} + +/* Signals a semaphore. */ +void sys_sem_signal(sys_sem_t sem) +{ + up(sem); +} + +/* Blocks the thread while waiting for the semaphore to be + * signaled. If the "timeout" argument is non-zero, the thread should + * only be blocked for the specified time (measured in + * milliseconds). + * + * If the timeout argument is non-zero, the return value is the number of + * milliseconds spent waiting for the semaphore to be signaled. If the + * semaphore wasn't signaled within the specified time, the return value is + * SYS_ARCH_TIMEOUT. If the thread didn't have to wait for the semaphore + * (i.e., it was already signaled), the function may return zero. */ +uint32_t sys_arch_sem_wait(sys_sem_t sem, uint32_t timeout) +{ + /* Slightly more complicated than the normal minios semaphore: + * need to wake on timeout *or* signal */ + sys_prot_t prot; + int64_t then = NOW(); + int64_t deadline; + + if (timeout == 0) + deadline = 0; + else + deadline = then + MILLISECS(timeout); + + while(1) { + wait_event_deadline(sem->wait, (sem->count > 0), deadline); + + prot = sys_arch_protect(); + /* Atomically check that we can proceed */ + if (sem->count > 0 || (deadline && NOW() >= deadline)) + break; + sys_arch_unprotect(prot); + } + + if (sem->count > 0) { + sem->count--; + sys_arch_unprotect(prot); + return NSEC_TO_MSEC(NOW() - then); + } + + sys_arch_unprotect(prot); + return SYS_ARCH_TIMEOUT; +} + +/* Creates an empty mailbox. */ +sys_mbox_t sys_mbox_new(int size) +{ + struct mbox *mbox = xmalloc(struct mbox); + if (!size) + size = 32; + else if (size == 1) + size = 2; + mbox->count = size; + mbox->messages = xmalloc_array(void*, size); + init_SEMAPHORE(&mbox->read_sem, 0); + mbox->reader = 0; + init_SEMAPHORE(&mbox->write_sem, size); + mbox->writer = 0; + return mbox; +} + +/* Deallocates a mailbox. If there are messages still present in the + * mailbox when the mailbox is deallocated, it is an indication of a + * programming error in lwIP and the developer should be notified. */ +void sys_mbox_free(sys_mbox_t mbox) +{ + ASSERT(mbox->reader == mbox->writer); + xfree(mbox->messages); + xfree(mbox); +} + +/* Posts the "msg" to the mailbox, internal version that actually does the + * post. */ +static void do_mbox_post(sys_mbox_t mbox, void *msg) +{ + /* The caller got a semaphore token, so we are now allowed to increment + * writer, but we still need to prevent concurrency between writers + * (interrupt handler vs main) */ + sys_prot_t prot = sys_arch_protect(); + mbox->messages[mbox->writer] = msg; + mbox->writer = (mbox->writer + 1) % mbox->count; + ASSERT(mbox->reader != mbox->writer); + sys_arch_unprotect(prot); + up(&mbox->read_sem); +} + +/* Posts the "msg" to the mailbox. */ +void sys_mbox_post(sys_mbox_t mbox, void *msg) +{ + if (mbox == SYS_MBOX_NULL) + return; + down(&mbox->write_sem); + do_mbox_post(mbox, msg); +} + +/* Try to post the "msg" to the mailbox. */ +err_t sys_mbox_trypost(sys_mbox_t mbox, void *msg) +{ + if (mbox == SYS_MBOX_NULL) + return ERR_BUF; + if (!trydown(&mbox->write_sem)) + return ERR_MEM; + do_mbox_post(mbox, msg); + return ERR_OK; +} + +/* + * Fetch a message from a mailbox. Internal version that actually does the + * fetch. + */ +static void do_mbox_fetch(sys_mbox_t mbox, void **msg) +{ + sys_prot_t prot; + /* The caller got a semaphore token, so we are now allowed to increment + * reader, but we may still need to prevent concurrency between readers. + * FIXME: can there be concurrent readers? */ + prot = sys_arch_protect(); + ASSERT(mbox->reader != mbox->writer); + if (msg != NULL) + *msg = mbox->messages[mbox->reader]; + mbox->reader = (mbox->reader + 1) % mbox->count; + sys_arch_unprotect(prot); + up(&mbox->write_sem); +} + +/* Blocks the thread until a message arrives in the mailbox, but does + * not block the thread longer than "timeout" milliseconds (similar to + * the sys_arch_sem_wait() function). The "msg" argument is a result + * parameter that is set by the function (i.e., by doing "*msg = + * ptr"). The "msg" parameter maybe NULL to indicate that the message + * should be dropped. + * + * The return values are the same as for the sys_arch_sem_wait() function: + * Number of milliseconds spent waiting or SYS_ARCH_TIMEOUT if there was a + * timeout. */ +uint32_t sys_arch_mbox_fetch(sys_mbox_t mbox, void **msg, uint32_t timeout) +{ + uint32_t rv; + if (mbox == SYS_MBOX_NULL) + return SYS_ARCH_TIMEOUT; + + rv = sys_arch_sem_wait(&mbox->read_sem, timeout); + if ( rv == SYS_ARCH_TIMEOUT ) + return rv; + + do_mbox_fetch(mbox, msg); + return 0; +} + +/* This is similar to sys_arch_mbox_fetch, however if a message is not + * present in the mailbox, it immediately returns with the code + * SYS_MBOX_EMPTY. On success 0 is returned. + * + * To allow for efficient implementations, this can be defined as a + * function-like macro in sys_arch.h instead of a normal function. For + * example, a naive implementation could be: + * #define sys_arch_mbox_tryfetch(mbox,msg) \ + * sys_arch_mbox_fetch(mbox,msg,1) + * although this would introduce unnecessary delays. */ + +uint32_t sys_arch_mbox_tryfetch(sys_mbox_t mbox, void **msg) { + if (mbox == SYS_MBOX_NULL) + return SYS_ARCH_TIMEOUT; + + if (!trydown(&mbox->read_sem)) + return SYS_MBOX_EMPTY; + + do_mbox_fetch(mbox, msg); + return 0; +} + + +/* Returns a pointer to the per-thread sys_timeouts structure. In lwIP, + * each thread has a list of timeouts which is repressented as a linked + * list of sys_timeout structures. The sys_timeouts structure holds a + * pointer to a linked list of timeouts. This function is called by + * the lwIP timeout scheduler and must not return a NULL value. + * + * In a single threadd sys_arch implementation, this function will + * simply return a pointer to a global sys_timeouts variable stored in + * the sys_arch module. */ +struct sys_timeouts *sys_arch_timeouts(void) +{ + static struct sys_timeouts timeout; + return &timeout; +} + + +/* Starts a new thread with priority "prio" that will begin its execution in the + * function "thread()". The "arg" argument will be passed as an argument to the + * thread() function. The id of the new thread is returned. Both the id and + * the priority are system dependent. */ +static struct thread *lwip_thread; +sys_thread_t sys_thread_new(char *name, void (* thread)(void *arg), void *arg, int stacksize, int prio) +{ + struct thread *t; + if (stacksize > STACK_SIZE) { + printk("Can't start lwIP thread: stack size %d is too large for our %lu\n", + stacksize, (unsigned long) STACK_SIZE); + do_exit(); + } + lwip_thread = t = create_thread(name, thread, arg); + return t; +} + +/* This optional function does a "fast" critical region protection and returns + * the previous protection level. This function is only called during very short + * critical regions. An embedded system which supports ISR-based drivers might + * want to implement this function by disabling interrupts. Task-based systems + * might want to implement this by using a mutex or disabling tasking. This + * function should support recursive calls from the same task or interrupt. In + * other words, sys_arch_protect() could be called while already protected. In + * that case the return value indicates that it is already protected. + * + * sys_arch_protect() is only required if your port is supporting an operating + * system. */ +sys_prot_t sys_arch_protect(void) +{ + unsigned long flags; + local_irq_save(flags); + return flags; +} + +/* This optional function does a "fast" set of critical region protection to the + * value specified by pval. See the documentation for sys_arch_protect() for + * more information. This function is only required if your port is supporting + * an operating system. */ +void sys_arch_unprotect(sys_prot_t pval) +{ + local_irq_restore(pval); +} + +/* non-fatal, print a message. */ +void lwip_printk(char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + printk("lwIP: "); + print(0, fmt, args); + va_end(args); +} + +/* fatal, print message and abandon execution. */ +void lwip_die(char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + printk("lwIP assertion failed: "); + print(0, fmt, args); + va_end(args); + printk("\n"); + BUG(); +} diff -Nru xen-4.6.0/extras/mini-os/lwip-net.c xen-4.6.5/extras/mini-os/lwip-net.c --- xen-4.6.0/extras/mini-os/lwip-net.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/lwip-net.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,386 @@ +/* + * lwip-net.c + * + * interface between lwIP's ethernet and Mini-os's netfront. + * For now, support only one network interface, as mini-os does. + * + * Tim Deegan , July 2007 + * based on lwIP's ethernetif.c skeleton file, copyrights as below. + */ + + +/* + * Copyright (c) 2001-2004 Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY + * OF SUCH DAMAGE. + * + * This file is part of the lwIP TCP/IP stack. + * + * Author: Adam Dunkels + * + */ + +#include + +#include "lwip/opt.h" +#include "lwip/def.h" +#include "lwip/mem.h" +#include "lwip/pbuf.h" +#include "lwip/sys.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "netif/etharp.h" + +#include + +/* Define those to better describe your network interface. */ +#define IFNAME0 'e' +#define IFNAME1 'n' + +#define IF_IPADDR 0x00000000 +#define IF_NETMASK 0x00000000 + +/* Only have one network interface at a time. */ +static struct netif *the_interface = NULL; + +static unsigned char rawmac[6]; +static struct netfront_dev *dev; + +/* Forward declarations. */ +static err_t netfront_output(struct netif *netif, struct pbuf *p, + struct ip_addr *ipaddr); + +/* + * low_level_output(): + * + * Should do the actual transmission of the packet. The packet is + * contained in the pbuf that is passed to the function. This pbuf + * might be chained. + * + */ + +static err_t +low_level_output(struct netif *netif, struct pbuf *p) +{ + if (!dev) + return ERR_OK; + +#ifdef ETH_PAD_SIZE + pbuf_header(p, -ETH_PAD_SIZE); /* drop the padding word */ +#endif + + /* Send the data from the pbuf to the interface, one pbuf at a + time. The size of the data in each pbuf is kept in the ->len + variable. */ + if (!p->next) { + /* Only one fragment, can send it directly */ + netfront_xmit(dev, p->payload, p->len); + } else { + unsigned char data[p->tot_len], *cur; + struct pbuf *q; + + for(q = p, cur = data; q != NULL; cur += q->len, q = q->next) + memcpy(cur, q->payload, q->len); + netfront_xmit(dev, data, p->tot_len); + } + +#if ETH_PAD_SIZE + pbuf_header(p, ETH_PAD_SIZE); /* reclaim the padding word */ +#endif + + LINK_STATS_INC(link.xmit); + + return ERR_OK; +} + + + +/* + * netfront_output(): + * + * This function is called by the TCP/IP stack when an IP packet + * should be sent. It calls the function called low_level_output() to + * do the actual transmission of the packet. + * + */ + +static err_t +netfront_output(struct netif *netif, struct pbuf *p, + struct ip_addr *ipaddr) +{ + + /* resolve hardware address, then send (or queue) packet */ + return etharp_output(netif, p, ipaddr); + +} + +/* + * netfront_input(): + * + * This function should be called when a packet is ready to be read + * from the interface. + * + */ + +static void +netfront_input(struct netif *netif, unsigned char* data, int len) +{ + struct eth_hdr *ethhdr; + struct pbuf *p, *q; + +#if ETH_PAD_SIZE + len += ETH_PAD_SIZE; /* allow room for Ethernet padding */ +#endif + + /* move received packet into a new pbuf */ + p = pbuf_alloc(PBUF_RAW, len, PBUF_POOL); + if (p == NULL) { + LINK_STATS_INC(link.memerr); + LINK_STATS_INC(link.drop); + return; + } + +#if ETH_PAD_SIZE + pbuf_header(p, -ETH_PAD_SIZE); /* drop the padding word */ +#endif + + /* We iterate over the pbuf chain until we have read the entire + * packet into the pbuf. */ + for(q = p; q != NULL && len > 0; q = q->next) { + /* Read enough bytes to fill this pbuf in the chain. The + * available data in the pbuf is given by the q->len + * variable. */ + memcpy(q->payload, data, len < q->len ? len : q->len); + data += q->len; + len -= q->len; + } + +#if ETH_PAD_SIZE + pbuf_header(p, ETH_PAD_SIZE); /* reclaim the padding word */ +#endif + + LINK_STATS_INC(link.recv); + + /* points to packet payload, which starts with an Ethernet header */ + ethhdr = p->payload; + + switch (htons(ethhdr->type)) { + /* IP packet? */ + case ETHTYPE_IP: +#if 0 +/* CSi disabled ARP table update on ingress IP packets. + This seems to work but needs thorough testing. */ + /* update ARP table */ + etharp_ip_input(netif, p); +#endif + /* skip Ethernet header */ + pbuf_header(p, -(int16_t)sizeof(struct eth_hdr)); + /* pass to network layer */ + if (tcpip_input(p, netif) == ERR_MEM) + /* Could not store it, drop */ + pbuf_free(p); + break; + + case ETHTYPE_ARP: + /* pass p to ARP module */ + etharp_arp_input(netif, (struct eth_addr *) netif->hwaddr, p); + break; + + default: + pbuf_free(p); + p = NULL; + break; + } +} + + +/* + * netif_rx(): overrides the default netif_rx behaviour in the netfront driver. + * + * Pull received packets into a pbuf queue for the low_level_input() + * function to pass up to lwIP. + */ + +void netif_rx(unsigned char* data, int len) +{ + if (the_interface != NULL) { + netfront_input(the_interface, data, len); + wake_up(&netfront_queue); + } + /* By returning, we ack the packet and relinquish the RX ring slot */ +} + +/* + * Set the IP, mask and gateway of the IF + */ +void networking_set_addr(struct ip_addr *ipaddr, struct ip_addr *netmask, struct ip_addr *gw) +{ + netif_set_ipaddr(the_interface, ipaddr); + netif_set_netmask(the_interface, netmask); + netif_set_gw(the_interface, gw); +} + + +static void +arp_timer(void *arg) +{ + etharp_tmr(); + sys_timeout(ARP_TMR_INTERVAL, arp_timer, NULL); +} + +/* + * netif_netfront_init(): + * + * Should be called at the beginning of the program to set up the + * network interface. It calls the function low_level_init() to do the + * actual setup of the hardware. + * + */ + +err_t +netif_netfront_init(struct netif *netif) +{ + unsigned char *mac = netif->state; + +#if LWIP_SNMP + /* ifType ethernetCsmacd(6) @see RFC1213 */ + netif->link_type = 6; + /* your link speed here */ + netif->link_speed = ; + netif->ts = 0; + netif->ifinoctets = 0; + netif->ifinucastpkts = 0; + netif->ifinnucastpkts = 0; + netif->ifindiscards = 0; + netif->ifoutoctets = 0; + netif->ifoutucastpkts = 0; + netif->ifoutnucastpkts = 0; + netif->ifoutdiscards = 0; +#endif + + netif->name[0] = IFNAME0; + netif->name[1] = IFNAME1; + netif->output = netfront_output; + netif->linkoutput = low_level_output; + + the_interface = netif; + + /* set MAC hardware address */ + netif->hwaddr_len = 6; + netif->hwaddr[0] = mac[0]; + netif->hwaddr[1] = mac[1]; + netif->hwaddr[2] = mac[2]; + netif->hwaddr[3] = mac[3]; + netif->hwaddr[4] = mac[4]; + netif->hwaddr[5] = mac[5]; + + /* No interesting per-interface state */ + netif->state = NULL; + + /* maximum transfer unit */ + netif->mtu = 1500; + + /* broadcast capability */ + netif->flags = NETIF_FLAG_BROADCAST; + + etharp_init(); + + sys_timeout(ARP_TMR_INTERVAL, arp_timer, NULL); + + return ERR_OK; +} + +/* + * Thread run by netfront: bring up the IP address and fire lwIP timers. + */ +static __DECLARE_SEMAPHORE_GENERIC(tcpip_is_up, 0); +static void tcpip_bringup_finished(void *p) +{ + tprintk("TCP/IP bringup ends.\n"); + up(&tcpip_is_up); +} + +/* + * Utility function to bring the whole lot up. Call this from app_main() + * or similar -- it starts netfront and have lwIP start its thread, + * which calls back to tcpip_bringup_finished(), which + * lets us know it's OK to continue. + */ +void start_networking(void) +{ + struct netif *netif; + struct ip_addr ipaddr = { htonl(IF_IPADDR) }; + struct ip_addr netmask = { htonl(IF_NETMASK) }; + struct ip_addr gw = { 0 }; + char *ip = NULL; + + tprintk("Waiting for network.\n"); + + dev = init_netfront(NULL, NULL, rawmac, &ip); + + if (ip) { + ipaddr.addr = inet_addr(ip); + if (IN_CLASSA(ntohl(ipaddr.addr))) + netmask.addr = htonl(IN_CLASSA_NET); + else if (IN_CLASSB(ntohl(ipaddr.addr))) + netmask.addr = htonl(IN_CLASSB_NET); + else if (IN_CLASSC(ntohl(ipaddr.addr))) + netmask.addr = htonl(IN_CLASSC_NET); + else + tprintk("Strange IP %s, leaving netmask to 0.\n", ip); + } + tprintk("IP %x netmask %x gateway %x.\n", + ntohl(ipaddr.addr), ntohl(netmask.addr), ntohl(gw.addr)); + + tprintk("TCP/IP bringup begins.\n"); + + netif = xmalloc(struct netif); + tcpip_init(tcpip_bringup_finished, netif); + + netif_add(netif, &ipaddr, &netmask, &gw, rawmac, + netif_netfront_init, ip_input); + netif_set_default(netif); + netif_set_up(netif); + + down(&tcpip_is_up); + + tprintk("Network is ready.\n"); +} + +/* Shut down the network */ +void stop_networking(void) +{ + if (dev) + shutdown_netfront(dev); +} diff -Nru xen-4.6.0/extras/mini-os/main.c xen-4.6.5/extras/mini-os/main.c --- xen-4.6.0/extras/mini-os/main.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/main.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,193 @@ +/* + * POSIX-compatible main layer + * + * Samuel Thibault , October 2007 + */ + +#ifdef HAVE_LIBC +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern int main(int argc, char *argv[], char *envp[]); +extern void __libc_init_array(void); +extern void __libc_fini_array(void); +extern unsigned long __CTOR_LIST__[]; +extern unsigned long __DTOR_LIST__[]; + +#if 0 +#include +int main(int argc, char *argv[], char *envp[]) +{ + printf("Hello, World!\n"); + return 1; +} +#endif + +void _init(void) +{ +} + +void _fini(void) +{ +} + +extern char __app_bss_start, __app_bss_end; +static void call_main(void *p) +{ + char *c, quote; +#ifdef CONFIG_QEMU_XS_ARGS + char *domargs, *msg; +#endif + int argc; + char **argv; + char *envp[] = { NULL }; +#ifdef CONFIG_QEMU_XS_ARGS + char *vm; + char path[128]; + int domid; +#endif + int i; + + /* Let other parts initialize (including console output) before maybe + * crashing. */ + //sleep(1); + +#ifdef CONFIG_SPARSE_BSS + sparse((unsigned long) &__app_bss_start, &__app_bss_end - &__app_bss_start); +#endif +#if defined(HAVE_LWIP) && defined(CONFIG_START_NETWORK) && defined(CONFIG_NETFRONT) + start_networking(); +#endif +#ifdef CONFIG_PCIFRONT + create_thread("pcifront", pcifront_watches, NULL); +#endif + +#ifdef CONFIG_QEMU_XS_ARGS + /* Fetch argc, argv from XenStore */ + domid = xenbus_read_integer("target"); + if (domid == -1) { + printk("Couldn't read target\n"); + do_exit(); + } + + snprintf(path, sizeof(path), "/local/domain/%d/vm", domid); + msg = xenbus_read(XBT_NIL, path, &vm); + if (msg) { + printk("Couldn't read vm path\n"); + do_exit(); + } + printk("dom vm is at %s\n", vm); + + snprintf(path, sizeof(path), "%s/image/dmargs", vm); + free(vm); + msg = xenbus_read(XBT_NIL, path, &domargs); + + if (msg) { + printk("Couldn't get stubdom args: %s\n", msg); + domargs = strdup(""); + } +#endif + + argc = 1; + +#define PARSE_ARGS(ARGS,START,QUOTE,END) \ + c = ARGS; \ + quote = 0; \ + while (*c) { \ + if (*c != ' ') { \ + START; \ + while (*c) { \ + if (quote) { \ + if (*c == quote) { \ + quote = 0; \ + QUOTE; \ + continue; \ + } \ + } else if (*c == ' ') \ + break; \ + if (*c == '"' || *c == '\'') { \ + quote = *c; \ + QUOTE; \ + continue; \ + } \ + c++; \ + } \ + } else { \ + END; \ + while (*c == ' ') \ + c++; \ + } \ + } \ + if (quote) {\ + printk("Warning: unterminated quotation %c\n", quote); \ + quote = 0; \ + } +#define PARSE_ARGS_COUNT(ARGS) PARSE_ARGS(ARGS, argc++, c++, ) +#define PARSE_ARGS_STORE(ARGS) PARSE_ARGS(ARGS, argv[argc++] = c, memmove(c, c + 1, strlen(c + 1) + 1), *c++ = 0) + + PARSE_ARGS_COUNT((char*)start_info.cmd_line); +#ifdef CONFIG_QEMU_XS_ARGS + PARSE_ARGS_COUNT(domargs); +#endif + + argv = alloca((argc + 1) * sizeof(char *)); + argv[0] = "main"; + argc = 1; + + PARSE_ARGS_STORE((char*)start_info.cmd_line) +#ifdef CONFIG_QEMU_XS_ARGS + PARSE_ARGS_STORE(domargs) +#endif + + argv[argc] = NULL; + + for (i = 0; i < argc; i++) + printf("\"%s\" ", argv[i]); + printf("\n"); + + __libc_init_array(); + environ = envp; + for (i = 0; __CTOR_LIST__[i] != 0; i++) + ((void((*)(void)))__CTOR_LIST__[i]) (); + tzset(); + + exit(main(argc, argv, envp)); +} + +void _exit(int ret) +{ + int i; + + for (i = 0; __DTOR_LIST__[i] != 0; i++) + ((void((*)(void)))__DTOR_LIST__[i]) (); + close_all_files(); + __libc_fini_array(); + printk("main returned %d\n", ret); +#if defined(HAVE_LWIP) && defined(CONFIG_NETFRONT) + stop_networking(); +#endif + stop_kernel(); + if (!ret) { + /* No problem, just shutdown. */ + struct sched_shutdown sched_shutdown = { .reason = SHUTDOWN_poweroff }; + HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); + } + do_exit(); +} + +int app_main(start_info_t *si) +{ + printk("main.c: dummy main: start_info=%p\n", si); + main_thread = create_thread("main", call_main, si); + return 0; +} +#endif diff -Nru xen-4.6.0/extras/mini-os/Makefile xen-4.6.5/extras/mini-os/Makefile --- xen-4.6.0/extras/mini-os/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/Makefile 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,224 @@ +# Common Makefile for mini-os. +# +# Every architecture directory below mini-os/arch has to have a +# Makefile and a arch.mk. +# + +OBJ_DIR=$(CURDIR) +TOPLEVEL_DIR=$(CURDIR) + +ifeq ($(MINIOS_CONFIG),) +include Config.mk +else +EXTRA_DEPS += $(MINIOS_CONFIG) +include $(MINIOS_CONFIG) +endif + +include $(MINIOS_ROOT)/config/MiniOS.mk + +# Configuration defaults +CONFIG_START_NETWORK ?= y +CONFIG_SPARSE_BSS ?= y +CONFIG_QEMU_XS_ARGS ?= n +CONFIG_TEST ?= n +CONFIG_PCIFRONT ?= n +CONFIG_BLKFRONT ?= y +CONFIG_TPMFRONT ?= n +CONFIG_TPM_TIS ?= n +CONFIG_TPMBACK ?= n +CONFIG_NETFRONT ?= y +CONFIG_FBFRONT ?= y +CONFIG_KBDFRONT ?= y +CONFIG_CONSFRONT ?= y +CONFIG_XENBUS ?= y +CONFIG_XC ?=y +CONFIG_LWIP ?= $(lwip) + +# Export config items as compiler directives +flags-$(CONFIG_START_NETWORK) += -DCONFIG_START_NETWORK +flags-$(CONFIG_SPARSE_BSS) += -DCONFIG_SPARSE_BSS +flags-$(CONFIG_QEMU_XS_ARGS) += -DCONFIG_QEMU_XS_ARGS +flags-$(CONFIG_PCIFRONT) += -DCONFIG_PCIFRONT +flags-$(CONFIG_BLKFRONT) += -DCONFIG_BLKFRONT +flags-$(CONFIG_TPMFRONT) += -DCONFIG_TPMFRONT +flags-$(CONFIG_TPM_TIS) += -DCONFIG_TPM_TIS +flags-$(CONFIG_TPMBACK) += -DCONFIG_TPMBACK +flags-$(CONFIG_NETFRONT) += -DCONFIG_NETFRONT +flags-$(CONFIG_KBDFRONT) += -DCONFIG_KBDFRONT +flags-$(CONFIG_FBFRONT) += -DCONFIG_FBFRONT +flags-$(CONFIG_CONSFRONT) += -DCONFIG_CONSFRONT +flags-$(CONFIG_XENBUS) += -DCONFIG_XENBUS + +DEF_CFLAGS += $(flags-y) + +# Symlinks and headers that must be created before building the C files +GENERATED_HEADERS := include/list.h $(ARCH_LINKS) include/mini-os include/$(TARGET_ARCH_FAM)/mini-os + +EXTRA_DEPS += $(GENERATED_HEADERS) + +# Include common mini-os makerules. +include minios.mk + +# Set tester flags +# CFLAGS += -DBLKTEST_WRITE + +# Define some default flags for linking. +LDLIBS := +APP_LDLIBS := +LDARCHLIB := -L$(OBJ_DIR)/$(TARGET_ARCH_DIR) -l$(ARCH_LIB_NAME) +LDFLAGS_FINAL := -T $(TARGET_ARCH_DIR)/minios-$(MINIOS_TARGET_ARCH).lds + +# Prefix for global API names. All other symbols are localised before +# linking with EXTRA_OBJS. +GLOBAL_PREFIX := xenos_ +EXTRA_OBJS = + +TARGET := mini-os + +# Subdirectories common to mini-os +SUBDIRS := lib xenbus console + +src-$(CONFIG_BLKFRONT) += blkfront.c +src-$(CONFIG_TPMFRONT) += tpmfront.c +src-$(CONFIG_TPM_TIS) += tpm_tis.c +src-$(CONFIG_TPMBACK) += tpmback.c +src-y += daytime.c +src-y += events.c +src-$(CONFIG_FBFRONT) += fbfront.c +src-y += gntmap.c +src-y += gnttab.c +src-y += hypervisor.c +src-y += kernel.c +src-y += lock.c +src-y += main.c +src-y += mm.c +src-$(CONFIG_NETFRONT) += netfront.c +src-$(CONFIG_PCIFRONT) += pcifront.c +src-y += sched.c +src-$(CONFIG_TEST) += test.c + +src-y += lib/ctype.c +src-y += lib/math.c +src-y += lib/printf.c +src-y += lib/stack_chk_fail.c +src-y += lib/string.c +src-y += lib/sys.c +src-y += lib/xmalloc.c +src-$(CONFIG_XENBUS) += lib/xs.c + +src-$(CONFIG_XENBUS) += xenbus/xenbus.c + +src-y += console/console.c +src-y += console/xencons_ring.c +src-$(CONFIG_CONSFRONT) += console/xenbus.c + +# The common mini-os objects to build. +APP_OBJS := +OBJS := $(patsubst %.c,$(OBJ_DIR)/%.o,$(src-y)) + +.PHONY: default +default: $(OBJ_DIR)/$(TARGET) + +# Create special architecture specific links. The function arch_links +# has to be defined in arch.mk (see include above). +ifneq ($(ARCH_LINKS),) +$(ARCH_LINKS): + $(arch_links) +endif + +include/list.h: include/minios-external/bsd-sys-queue-h-seddery include/minios-external/bsd-sys-queue.h + perl $^ --prefix=minios >$@.new + $(call move-if-changed,$@.new,$@) + +# Used by stubdom's Makefile +.PHONY: links +links: $(GENERATED_HEADERS) + +include/mini-os: + ln -sf . $@ + +include/$(TARGET_ARCH_FAM)/mini-os: + ln -sf . $@ + +.PHONY: arch_lib +arch_lib: + $(MAKE) --directory=$(TARGET_ARCH_DIR) OBJ_DIR=$(OBJ_DIR)/$(TARGET_ARCH_DIR) || exit 1; + +ifeq ($(CONFIG_LWIP),y) +# lwIP library +LWC := $(sort $(shell find $(LWIPDIR)/src -type f -name '*.c')) +LWC := $(filter-out %6.c %ip6_addr.c %ethernetif.c, $(LWC)) +LWO := $(patsubst %.c,%.o,$(LWC)) +LWO += $(OBJ_DIR)/lwip-arch.o +ifeq ($(CONFIG_NETFRONT),y) +LWO += $(OBJ_DIR)/lwip-net.o +endif + +$(OBJ_DIR)/lwip.a: $(LWO) + $(RM) $@ + $(AR) cqs $@ $^ + +OBJS += $(OBJ_DIR)/lwip.a +endif + +OBJS := $(filter-out $(OBJ_DIR)/lwip%.o $(LWO), $(OBJS)) + +ifeq ($(libc),y) +ifeq ($(CONFIG_XC),y) +APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libxc-$(MINIOS_TARGET_ARCH) -whole-archive -lxenguest -lxenctrl -no-whole-archive +endif +APP_LDLIBS += -lpci +APP_LDLIBS += -lz +APP_LDLIBS += -lm +LDLIBS += -lc +endif + +ifneq ($(APP_OBJS)-$(lwip),-y) +OBJS := $(filter-out $(OBJ_DIR)/daytime.o, $(OBJS)) +endif + +$(OBJ_DIR)/$(TARGET)_app.o: $(APP_OBJS) app.lds + $(LD) -r -d $(LDFLAGS) -\( $^ -\) $(APP_LDLIBS) --undefined main -o $@ + +ifneq ($(APP_OBJS),) +APP_O=$(OBJ_DIR)/$(TARGET)_app.o +endif + +$(OBJ_DIR)/$(TARGET): $(OBJS) $(APP_O) arch_lib + $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(APP_O) $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o + $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o + $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@ + gzip -f -9 -c $@ >$@.gz + +.PHONY: clean arch_clean + +arch_clean: + $(MAKE) --directory=$(TARGET_ARCH_DIR) OBJ_DIR=$(OBJ_DIR)/$(TARGET_ARCH_DIR) clean || exit 1; + +clean: arch_clean + for dir in $(addprefix $(OBJ_DIR)/,$(SUBDIRS)); do \ + rm -f $$dir/*.o; \ + done + rm -f include/list.h + rm -f $(OBJ_DIR)/*.o *~ $(OBJ_DIR)/core $(OBJ_DIR)/$(TARGET).elf $(OBJ_DIR)/$(TARGET).raw $(OBJ_DIR)/$(TARGET) $(OBJ_DIR)/$(TARGET).gz + find . $(OBJ_DIR) -type l | xargs rm -f + $(RM) $(OBJ_DIR)/lwip.a $(LWO) + rm -f tags TAGS + + +define all_sources + ( find . -follow -name SCCS -prune -o -name '*.[chS]' -print ) +endef + +.PHONY: cscope +cscope: + $(all_sources) > cscope.files + cscope -k -b -q + +.PHONY: tags +tags: + $(all_sources) | xargs ctags + +.PHONY: TAGS +TAGS: + $(all_sources) | xargs etags diff -Nru xen-4.6.0/extras/mini-os/minios.mk xen-4.6.5/extras/mini-os/minios.mk --- xen-4.6.0/extras/mini-os/minios.mk 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/minios.mk 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,76 @@ +# +# The file contains the common make rules for building mini-os. +# + +debug = y + +# Define some default flags. +# NB. '-Wcast-qual' is nasty, so I omitted it. +DEF_CFLAGS += -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format -Wno-redundant-decls -Wformat +DEF_CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) +DEF_CFLAGS += $(call cc-option,$(CC),-fgnu89-inline) +DEF_CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline +DEF_CPPFLAGS += -D__XEN_INTERFACE_VERSION__=$(XEN_INTERFACE_VERSION) + +DEF_ASFLAGS += -D__ASSEMBLY__ +DEF_LDFLAGS += + +ifeq ($(debug),y) +DEF_CFLAGS += -g +#DEF_CFLAGS += -DMM_DEBUG +#DEF_CFLAGS += -DFS_DEBUG +#DEF_CFLAGS += -DLIBC_DEBUG +#DEF_CFLAGS += -DGNT_DEBUG +#DEF_CFLAGS += -DGNTMAP_DEBUG +else +DEF_CFLAGS += -O3 +endif + +# Make the headers define our internal stuff +DEF_CFLAGS += -D__INSIDE_MINIOS__ + +# Build the CFLAGS and ASFLAGS for compiling and assembling. +# DEF_... flags are the common mini-os flags, +# ARCH_... flags may be defined in arch/$(TARGET_ARCH_FAM/rules.mk +CFLAGS := $(DEF_CFLAGS) $(ARCH_CFLAGS) +CPPFLAGS := $(DEF_CPPFLAGS) $(ARCH_CPPFLAGS) +ASFLAGS := $(DEF_ASFLAGS) $(ARCH_ASFLAGS) +LDFLAGS := $(DEF_LDFLAGS) $(ARCH_LDFLAGS) + +# Special build dependencies. +# Rebuild all after touching this/these file(s) +EXTRA_DEPS += $(MINIOS_ROOT)/minios.mk +EXTRA_DEPS += $(MINIOS_ROOT)/$(TARGET_ARCH_DIR)/arch.mk + +# Find all header files for checking dependencies. +HDRS := $(wildcard $(MINIOS_ROOT)/include/*.h) +HDRS += $(wildcard $(MINIOS_ROOT)/include/xen/*.h) +HDRS += $(wildcard $(ARCH_INC)/*.h) +# For special wanted header directories. +extra_heads := $(foreach dir,$(EXTRA_INC),$(wildcard $(dir)/*.h)) +HDRS += $(extra_heads) + +# Add the special header directories to the include paths. +override CPPFLAGS := $(CPPFLAGS) $(extra_incl) + +# The name of the architecture specific library. +# This is on x86_32: libx86_32.a +# $(ARCH_LIB) has to built in the architecture specific directory. +ARCH_LIB_NAME = $(MINIOS_TARGET_ARCH) +ARCH_LIB := lib$(ARCH_LIB_NAME).a + +# This object contains the entrypoint for startup from Xen. +# $(HEAD_ARCH_OBJ) has to be built in the architecture specific directory. +HEAD_ARCH_OBJ := $(MINIOS_TARGET_ARCH).o +HEAD_OBJ := $(OBJ_DIR)/$(TARGET_ARCH_DIR)/$(HEAD_ARCH_OBJ) + + +$(OBJ_DIR)/%.o: %.c $(HDRS) Makefile $(EXTRA_DEPS) + $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ + +$(OBJ_DIR)/%.o: %.S $(HDRS) Makefile $(EXTRA_DEPS) + $(CC) $(ASFLAGS) $(CPPFLAGS) -c $< -o $@ + + + + diff -Nru xen-4.6.0/extras/mini-os/mm.c xen-4.6.5/extras/mini-os/mm.c --- xen-4.6.0/extras/mini-os/mm.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/mm.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,445 @@ +/* + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: mm.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos + * + * Date: Aug 2003, chages Aug 2005 + * + * Environment: Xen Minimal OS + * Description: memory management related functions + * contains buddy page allocator from Xen. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef MM_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=mm.c, line=%d) " _f "\n", __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + +/********************* + * ALLOCATION BITMAP + * One bit per page of memory. Bit set => page is allocated. + */ + +static unsigned long *alloc_bitmap; +#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8) + +#define allocated_in_map(_pn) \ +(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1UL<<((_pn)&(PAGES_PER_MAPWORD-1)))) + +/* + * Hint regarding bitwise arithmetic in map_{alloc,free}: + * -(1<= n. + * (1<next == NULL) + +#define round_pgdown(_p) ((_p)&PAGE_MASK) +#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) + +#ifdef MM_DEBUG +/* + * Prints allocation[0/1] for @nr_pages, starting at @start + * address (virtual). + */ +USED static void print_allocation(void *start, int nr_pages) +{ + unsigned long pfn_start = virt_to_pfn(start); + int count; + for(count = 0; count < nr_pages; count++) + if(allocated_in_map(pfn_start + count)) printk("1"); + else printk("0"); + + printk("\n"); +} + +/* + * Prints chunks (making them with letters) for @nr_pages starting + * at @start (virtual). + */ +USED static void print_chunks(void *start, int nr_pages) +{ + char chunks[1001], current='A'; + int order, count; + chunk_head_t *head; + unsigned long pfn_start = virt_to_pfn(start); + + memset(chunks, (int)'_', 1000); + if(nr_pages > 1000) + { + DEBUG("Can only pring 1000 pages. Increase buffer size."); + } + + for(order=0; order < FREELIST_SIZE; order++) + { + head = free_head[order]; + while(!FREELIST_EMPTY(head)) + { + for(count = 0; count < 1UL<< head->level; count++) + { + if(count + virt_to_pfn(head) - pfn_start < 1000) + chunks[count + virt_to_pfn(head) - pfn_start] = current; + } + head = head->next; + current++; + } + } + chunks[nr_pages] = '\0'; + printk("%s\n", chunks); +} +#endif + + +/* + * Initialise allocator, placing addresses [@min,@max] in free pool. + * @min and @max are PHYSICAL addresses. + */ +static void init_page_allocator(unsigned long min, unsigned long max) +{ + int i; + unsigned long range, bitmap_size; + chunk_head_t *ch; + chunk_tail_t *ct; + for ( i = 0; i < FREELIST_SIZE; i++ ) + { + free_head[i] = &free_tail[i]; + free_tail[i].pprev = &free_head[i]; + free_tail[i].next = NULL; + } + + min = round_pgup (min); + max = round_pgdown(max); + + /* Allocate space for the allocation bitmap. */ + bitmap_size = (max+1) >> (PAGE_SHIFT+3); + bitmap_size = round_pgup(bitmap_size); + alloc_bitmap = (unsigned long *)to_virt(min); + min += bitmap_size; + range = max - min; + + /* All allocated by default. */ + memset(alloc_bitmap, ~0, bitmap_size); + /* Free up the memory we've been given to play with. */ + map_free(PHYS_PFN(min), range>>PAGE_SHIFT); + + /* The buddy lists are addressed in high memory. */ + min = (unsigned long) to_virt(min); + max = (unsigned long) to_virt(max); + + while ( range != 0 ) + { + /* + * Next chunk is limited by alignment of min, but also + * must not be bigger than remaining range. + */ + for ( i = PAGE_SHIFT; (1UL<<(i+1)) <= range; i++ ) + if ( min & (1UL<level = i; + ch->next = free_head[i]; + ch->pprev = &free_head[i]; + ch->next->pprev = &ch->next; + free_head[i] = ch; + ct->level = i; + } +} + + +/* Allocate 2^@order contiguous pages. Returns a VIRTUAL address. */ +unsigned long alloc_pages(int order) +{ + int i; + chunk_head_t *alloc_ch, *spare_ch; + chunk_tail_t *spare_ct; + + + /* Find smallest order which can satisfy the request. */ + for ( i = order; i < FREELIST_SIZE; i++ ) { + if ( !FREELIST_EMPTY(free_head[i]) ) + break; + } + + if ( i == FREELIST_SIZE ) goto no_memory; + + /* Unlink a chunk. */ + alloc_ch = free_head[i]; + free_head[i] = alloc_ch->next; + alloc_ch->next->pprev = alloc_ch->pprev; + + /* We may have to break the chunk a number of times. */ + while ( i != order ) + { + /* Split into two equal parts. */ + i--; + spare_ch = (chunk_head_t *)((char *)alloc_ch + (1UL<<(i+PAGE_SHIFT))); + spare_ct = (chunk_tail_t *)((char *)spare_ch + (1UL<<(i+PAGE_SHIFT)))-1; + + /* Create new header for spare chunk. */ + spare_ch->level = i; + spare_ch->next = free_head[i]; + spare_ch->pprev = &free_head[i]; + spare_ct->level = i; + + /* Link in the spare chunk. */ + spare_ch->next->pprev = &spare_ch->next; + free_head[i] = spare_ch; + } + + map_alloc(PHYS_PFN(to_phys(alloc_ch)), 1UL<level != order) + break; + + /* Merge with predecessor */ + freed_ch = to_merge_ch; + } + else + { + to_merge_ch = (chunk_head_t *)((char *)freed_ch + mask); + if(allocated_in_map(virt_to_pfn(to_merge_ch)) || + to_merge_ch->level != order) + break; + + /* Merge with successor */ + freed_ct = (chunk_tail_t *)((char *)to_merge_ch + mask) - 1; + } + + /* We are commited to merging, unlink the chunk */ + *(to_merge_ch->pprev) = to_merge_ch->next; + to_merge_ch->next->pprev = to_merge_ch->pprev; + + order++; + } + + /* Link the new chunk */ + freed_ch->level = order; + freed_ch->next = free_head[order]; + freed_ch->pprev = &free_head[order]; + freed_ct->level = order; + + freed_ch->next->pprev = &freed_ch->next; + free_head[order] = freed_ch; + +} + +int free_physical_pages(xen_pfn_t *mfns, int n) +{ + struct xen_memory_reservation reservation; + + set_xen_guest_handle(reservation.extent_start, mfns); + reservation.nr_extents = n; + reservation.extent_order = 0; + reservation.domid = DOMID_SELF; + return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); +} + +#ifdef HAVE_LIBC +void *sbrk(ptrdiff_t increment) +{ + unsigned long old_brk = brk; + unsigned long new_brk = old_brk + increment; + + if (new_brk > heap_end) { + printk("Heap exhausted: %lx + %lx = %p > %p\n", + old_brk, + (unsigned long) increment, + (void *) new_brk, + (void *) heap_end); + return NULL; + } + + if (new_brk > heap_mapped) { + unsigned long n = (new_brk - heap_mapped + PAGE_SIZE - 1) / PAGE_SIZE; + do_map_zero(heap_mapped, n); + heap_mapped += n * PAGE_SIZE; + } + + brk = new_brk; + + return (void *) old_brk; +} +#endif + + + +void init_mm(void) +{ + + unsigned long start_pfn, max_pfn; + + printk("MM: Init\n"); + + arch_init_mm(&start_pfn, &max_pfn); + /* + * now we can initialise the page allocator + */ + printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n", + (u_long)to_virt(PFN_PHYS(start_pfn)), (u_long)PFN_PHYS(start_pfn), + (u_long)to_virt(PFN_PHYS(max_pfn)), (u_long)PFN_PHYS(max_pfn)); + init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn)); + printk("MM: done\n"); + + arch_init_p2m(max_pfn); + + arch_init_demand_mapping_area(max_pfn); +} + +void fini_mm(void) +{ +} + +void sanity_check(void) +{ + int x; + chunk_head_t *head; + + for (x = 0; x < FREELIST_SIZE; x++) { + for (head = free_head[x]; !FREELIST_EMPTY(head); head = head->next) { + ASSERT(!allocated_in_map(virt_to_pfn(head))); + if (head->next) + ASSERT(head->next->pprev == &head->next); + } + if (free_head[x]) { + ASSERT(free_head[x]->pprev == &free_head[x]); + } + } +} diff -Nru xen-4.6.0/extras/mini-os/netfront.c xen-4.6.5/extras/mini-os/netfront.c --- xen-4.6.0/extras/mini-os/netfront.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/netfront.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,667 @@ +/* Minimal network driver for Mini-OS. + * Copyright (c) 2006-2007 Jacob Gorm Hansen, University of Copenhagen. + * Based on netfront.c from Xen Linux. + * + * Does not handle fragments or extras. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DECLARE_WAIT_QUEUE_HEAD(netfront_queue); + +#ifdef HAVE_LIBC +#define NETIF_SELECT_RX ((void*)-1) +#endif + + + +#define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE) +#define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE) +#define GRANT_INVALID_REF 0 + + +struct net_buffer { + void* page; + grant_ref_t gref; +}; + +struct netfront_dev { + domid_t dom; + + unsigned short tx_freelist[NET_TX_RING_SIZE + 1]; + struct semaphore tx_sem; + + struct net_buffer rx_buffers[NET_RX_RING_SIZE]; + struct net_buffer tx_buffers[NET_TX_RING_SIZE]; + + struct netif_tx_front_ring tx; + struct netif_rx_front_ring rx; + grant_ref_t tx_ring_ref; + grant_ref_t rx_ring_ref; + evtchn_port_t evtchn; + + char *nodename; + char *backend; + char *mac; + + xenbus_event_queue events; + +#ifdef HAVE_LIBC + int fd; + unsigned char *data; + size_t len; + size_t rlen; +#endif + + void (*netif_rx)(unsigned char* data, int len); +}; + +void init_rx_buffers(struct netfront_dev *dev); + +static inline void add_id_to_freelist(unsigned int id,unsigned short* freelist) +{ + freelist[id + 1] = freelist[0]; + freelist[0] = id; +} + +static inline unsigned short get_id_from_freelist(unsigned short* freelist) +{ + unsigned int id = freelist[0]; + freelist[0] = freelist[id + 1]; + return id; +} + +__attribute__((weak)) void netif_rx(unsigned char* data,int len) +{ + printk("%d bytes incoming at %p\n",len,data); +} + +__attribute__((weak)) void net_app_main(void*si,unsigned char*mac) {} + +static inline int xennet_rxidx(RING_IDX idx) +{ + return idx & (NET_RX_RING_SIZE - 1); +} + +void network_rx(struct netfront_dev *dev) +{ + RING_IDX rp,cons,req_prod; + int nr_consumed, more, i, notify; + int dobreak; + + nr_consumed = 0; +moretodo: + rp = dev->rx.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + dobreak = 0; + for (cons = dev->rx.rsp_cons; cons != rp && !dobreak; nr_consumed++, cons++) + { + struct net_buffer* buf; + unsigned char* page; + int id; + + struct netif_rx_response *rx = RING_GET_RESPONSE(&dev->rx, cons); + + id = rx->id; + BUG_ON(id >= NET_RX_RING_SIZE); + + buf = &dev->rx_buffers[id]; + page = (unsigned char*)buf->page; + gnttab_end_access(buf->gref); + + if (rx->status > NETIF_RSP_NULL) + { +#ifdef HAVE_LIBC + if (dev->netif_rx == NETIF_SELECT_RX) { + int len = rx->status; + ASSERT(current == main_thread); + if (len > dev->len) + len = dev->len; + memcpy(dev->data, page+rx->offset, len); + dev->rlen = len; + /* No need to receive the rest for now */ + dobreak = 1; + } else +#endif + dev->netif_rx(page+rx->offset,rx->status); + } + } + dev->rx.rsp_cons=cons; + + RING_FINAL_CHECK_FOR_RESPONSES(&dev->rx,more); + if(more && !dobreak) goto moretodo; + + req_prod = dev->rx.req_prod_pvt; + + for(i=0; irx, req_prod + i); + struct net_buffer* buf = &dev->rx_buffers[id]; + void* page = buf->page; + + /* We are sure to have free gnttab entries since they got released above */ + buf->gref = req->gref = + gnttab_grant_access(dev->dom,virt_to_mfn(page),0); + + req->id = id; + } + + wmb(); + + dev->rx.req_prod_pvt = req_prod + i; + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->rx, notify); + if (notify) + notify_remote_via_evtchn(dev->evtchn); + +} + +void network_tx_buf_gc(struct netfront_dev *dev) +{ + + + RING_IDX cons, prod; + unsigned short id; + + do { + prod = dev->tx.sring->rsp_prod; + rmb(); /* Ensure we see responses up to 'rp'. */ + + for (cons = dev->tx.rsp_cons; cons != prod; cons++) + { + struct netif_tx_response *txrsp; + struct net_buffer *buf; + + txrsp = RING_GET_RESPONSE(&dev->tx, cons); + if (txrsp->status == NETIF_RSP_NULL) + continue; + + if (txrsp->status == NETIF_RSP_ERROR) + printk("packet error\n"); + + id = txrsp->id; + BUG_ON(id >= NET_TX_RING_SIZE); + buf = &dev->tx_buffers[id]; + gnttab_end_access(buf->gref); + buf->gref=GRANT_INVALID_REF; + + add_id_to_freelist(id,dev->tx_freelist); + up(&dev->tx_sem); + } + + dev->tx.rsp_cons = prod; + + /* + * Set a new event, then check for race with update of tx_cons. + * Note that it is essential to schedule a callback, no matter + * how few tx_buffers are pending. Even if there is space in the + * transmit ring, higher layers may be blocked because too much + * data is outstanding: in such cases notification from Xen is + * likely to be the only kick that we'll get. + */ + dev->tx.sring->rsp_event = + prod + ((dev->tx.sring->req_prod - prod) >> 1) + 1; + mb(); + } while ((cons == prod) && (prod != dev->tx.sring->rsp_prod)); + + +} + +void netfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ + int flags; + struct netfront_dev *dev = data; + + local_irq_save(flags); + + network_tx_buf_gc(dev); + network_rx(dev); + + local_irq_restore(flags); +} + +#ifdef HAVE_LIBC +void netfront_select_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ + int flags; + struct netfront_dev *dev = data; + int fd = dev->fd; + + local_irq_save(flags); + network_tx_buf_gc(dev); + local_irq_restore(flags); + + if (fd != -1) + files[fd].read = 1; + wake_up(&netfront_queue); +} +#endif + +static void free_netfront(struct netfront_dev *dev) +{ + int i; + + for(i=0;itx_sem); + + mask_evtchn(dev->evtchn); + + free(dev->mac); + free(dev->backend); + + gnttab_end_access(dev->rx_ring_ref); + gnttab_end_access(dev->tx_ring_ref); + + free_page(dev->rx.sring); + free_page(dev->tx.sring); + + unbind_evtchn(dev->evtchn); + + for(i=0;irx_buffers[i].gref); + free_page(dev->rx_buffers[i].page); + } + + for(i=0;itx_buffers[i].page) + free_page(dev->tx_buffers[i].page); + + free(dev->nodename); + free(dev); +} + +struct netfront_dev *init_netfront(char *_nodename, void (*thenetif_rx)(unsigned char* data, int len), unsigned char rawmac[6], char **ip) +{ + xenbus_transaction_t xbt; + char* err; + char* message=NULL; + struct netif_tx_sring *txs; + struct netif_rx_sring *rxs; + int retry=0; + int i; + char* msg = NULL; + char nodename[256]; + char path[256]; + struct netfront_dev *dev; + static int netfrontends = 0; + + if (!_nodename) + snprintf(nodename, sizeof(nodename), "device/vif/%d", netfrontends); + else { + strncpy(nodename, _nodename, sizeof(nodename) - 1); + nodename[sizeof(nodename) - 1] = 0; + } + netfrontends++; + + if (!thenetif_rx) + thenetif_rx = netif_rx; + + printk("************************ NETFRONT for %s **********\n\n\n", nodename); + + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + printk("net TX ring size %lu\n", (unsigned long) NET_TX_RING_SIZE); + printk("net RX ring size %lu\n", (unsigned long) NET_RX_RING_SIZE); + init_SEMAPHORE(&dev->tx_sem, NET_TX_RING_SIZE); + for(i=0;itx_freelist); + dev->tx_buffers[i].page = NULL; + } + + for(i=0;irx_buffers[i].page = (char*)alloc_page(); + } + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dev->dom = xenbus_read_integer(path); +#ifdef HAVE_LIBC + if (thenetif_rx == NETIF_SELECT_RX) + evtchn_alloc_unbound(dev->dom, netfront_select_handler, dev, &dev->evtchn); + else +#endif + evtchn_alloc_unbound(dev->dom, netfront_handler, dev, &dev->evtchn); + + txs = (struct netif_tx_sring *) alloc_page(); + rxs = (struct netif_rx_sring *) alloc_page(); + memset(txs,0,PAGE_SIZE); + memset(rxs,0,PAGE_SIZE); + + + SHARED_RING_INIT(txs); + SHARED_RING_INIT(rxs); + FRONT_RING_INIT(&dev->tx, txs, PAGE_SIZE); + FRONT_RING_INIT(&dev->rx, rxs, PAGE_SIZE); + + dev->tx_ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(txs),0); + dev->rx_ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(rxs),0); + + init_rx_buffers(dev); + + dev->netif_rx = thenetif_rx; + + dev->events = NULL; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "tx-ring-ref","%u", + dev->tx_ring_ref); + if (err) { + message = "writing tx ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "rx-ring-ref","%u", + dev->rx_ring_ref); + if (err) { + message = "writing rx ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "event-channel", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, nodename, "request-rx-copy", "%u", 1); + + if (err) { + message = "writing request-rx-copy"; + goto abort_transaction; + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateConnected); + if (err) { + message = "switching state"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + snprintf(path, sizeof(path), "%s/mac", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->mac); + + if ((dev->backend == NULL) || (dev->mac == NULL)) { + printk("%s: backend/mac failed\n", __func__); + goto error; + } + + printk("backend at %s\n",dev->backend); + printk("mac is %s\n",dev->mac); + + { + XenbusState state; + char path[strlen(dev->backend) + strlen("/state") + 1]; + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + + err = NULL; + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateConnected) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + if (state != XenbusStateConnected) { + printk("backend not avalable, state=%d\n", state); + xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + + if (ip) { + snprintf(path, sizeof(path), "%s/ip", dev->backend); + xenbus_read(XBT_NIL, path, ip); + } + } + + printk("**************************\n"); + + unmask_evtchn(dev->evtchn); + + /* Special conversion specifier 'hh' needed for __ia64__. Without + this mini-os panics with 'Unaligned reference'. */ + if (rawmac) + sscanf(dev->mac,"%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &rawmac[0], + &rawmac[1], + &rawmac[2], + &rawmac[3], + &rawmac[4], + &rawmac[5]); + + return dev; +error: + free(msg); + free(err); + free_netfront(dev); + return NULL; +} + +#ifdef HAVE_LIBC +int netfront_tap_open(char *nodename) { + struct netfront_dev *dev; + + dev = init_netfront(nodename, NETIF_SELECT_RX, NULL, NULL); + if (!dev) { + printk("TAP open failed\n"); + errno = EIO; + return -1; + } + dev->fd = alloc_fd(FTYPE_TAP); + printk("tap_open(%s) -> %d\n", nodename, dev->fd); + files[dev->fd].tap.dev = dev; + return dev->fd; +} +#endif + +void shutdown_netfront(struct netfront_dev *dev) +{ + char* err = NULL, *err2; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/request-rx-copy") + 1]; + + printk("close network: backend at %s\n",dev->backend); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("shutdown_netfront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("shutdown_netfront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close; + } + state = xenbus_read_integer(path); + while (state < XenbusStateClosed) { + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + } + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { + printk("shutdown_netfront: error changing state to %d: %s\n", + XenbusStateInitialising, err); + goto close; + } + state = xenbus_read_integer(path); + while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed)) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + +close: + free(err); + err2 = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err2); + + snprintf(nodename, sizeof(nodename), "%s/tx-ring-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/rx-ring-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/request-rx-copy", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + + if (!err) + free_netfront(dev); +} + + +void init_rx_buffers(struct netfront_dev *dev) +{ + int i, requeue_idx; + netif_rx_request_t *req; + int notify; + + /* Rebuild the RX buffer freelist and the RX ring itself. */ + for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) + { + struct net_buffer* buf = &dev->rx_buffers[requeue_idx]; + req = RING_GET_REQUEST(&dev->rx, requeue_idx); + + buf->gref = req->gref = + gnttab_grant_access(dev->dom,virt_to_mfn(buf->page),0); + + req->id = requeue_idx; + + requeue_idx++; + } + + dev->rx.req_prod_pvt = requeue_idx; + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->rx, notify); + + if (notify) + notify_remote_via_evtchn(dev->evtchn); + + dev->rx.sring->rsp_event = dev->rx.rsp_cons + 1; +} + + +void netfront_xmit(struct netfront_dev *dev, unsigned char* data,int len) +{ + int flags; + struct netif_tx_request *tx; + RING_IDX i; + int notify; + unsigned short id; + struct net_buffer* buf; + void* page; + + BUG_ON(len > PAGE_SIZE); + + down(&dev->tx_sem); + + local_irq_save(flags); + id = get_id_from_freelist(dev->tx_freelist); + local_irq_restore(flags); + + buf = &dev->tx_buffers[id]; + page = buf->page; + if (!page) + page = buf->page = (char*) alloc_page(); + + i = dev->tx.req_prod_pvt; + tx = RING_GET_REQUEST(&dev->tx, i); + + memcpy(page,data,len); + + buf->gref = + tx->gref = gnttab_grant_access(dev->dom,virt_to_mfn(page),1); + + tx->offset=0; + tx->size = len; + tx->flags=0; + tx->id = id; + dev->tx.req_prod_pvt = i + 1; + + wmb(); + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->tx, notify); + + if(notify) notify_remote_via_evtchn(dev->evtchn); + + local_irq_save(flags); + network_tx_buf_gc(dev); + local_irq_restore(flags); +} + +#ifdef HAVE_LIBC +ssize_t netfront_receive(struct netfront_dev *dev, unsigned char *data, size_t len) +{ + unsigned long flags; + int fd = dev->fd; + ASSERT(current == main_thread); + + dev->rlen = 0; + dev->data = data; + dev->len = len; + + local_irq_save(flags); + network_rx(dev); + if (!dev->rlen && fd != -1) + /* No data for us, make select stop returning */ + files[fd].read = 0; + /* Before re-enabling the interrupts, in case a packet just arrived in the + * meanwhile. */ + local_irq_restore(flags); + + dev->data = NULL; + dev->len = 0; + + return dev->rlen; +} +#endif diff -Nru xen-4.6.0/extras/mini-os/pcifront.c xen-4.6.5/extras/mini-os/pcifront.c --- xen-4.6.0/extras/mini-os/pcifront.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/pcifront.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,616 @@ +/* Minimal PCI driver for Mini-OS. + * Copyright (c) 2007-2008 Samuel Thibault. + * Based on blkfront.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) + +DECLARE_WAIT_QUEUE_HEAD(pcifront_queue); +static struct pcifront_dev *pcidev; + +struct pcifront_dev { + domid_t dom; + + struct xen_pci_sharedinfo *info; + grant_ref_t info_ref; + evtchn_port_t evtchn; + + char *nodename; + char *backend; + + xenbus_event_queue events; +}; + +void pcifront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ + wake_up(&pcifront_queue); +} + +static void free_pcifront(struct pcifront_dev *dev) +{ + if (!dev) + dev = pcidev; + + mask_evtchn(dev->evtchn); + + gnttab_end_access(dev->info_ref); + free_page(dev->info); + + unbind_evtchn(dev->evtchn); + + free(dev->backend); + free(dev->nodename); + free(dev); +} + +void pcifront_watches(void *opaque) +{ + XenbusState state; + char *err = NULL, *msg = NULL; + char *be_path, *be_state; + char* nodename = opaque ? opaque : "device/pci/0"; + char path[strlen(nodename) + 9]; + char fe_state[strlen(nodename) + 7]; + xenbus_event_queue events = NULL; + + snprintf(path, sizeof(path), "%s/backend", nodename); + snprintf(fe_state, sizeof(fe_state), "%s/state", nodename); + + while (1) { + printk("pcifront_watches: waiting for backend path to appear %s\n", path); + xenbus_watch_path_token(XBT_NIL, path, path, &events); + while ((err = xenbus_read(XBT_NIL, path, &be_path)) != NULL) { + free(err); + xenbus_wait_for_watch(&events); + } + xenbus_unwatch_path_token(XBT_NIL, path, path); + printk("pcifront_watches: waiting for backend to get into the right state %s\n", be_path); + be_state = (char *) malloc(strlen(be_path) + 7); + snprintf(be_state, strlen(be_path) + 7, "%s/state", be_path); + xenbus_watch_path_token(XBT_NIL, be_state, be_state, &events); + while ((err = xenbus_read(XBT_NIL, be_state, &msg)) != NULL || msg[0] > '4') { + free(msg); + free(err); + xenbus_wait_for_watch(&events); + } + xenbus_unwatch_path_token(XBT_NIL, be_state, be_state); + if (init_pcifront(NULL) == NULL) { + free(be_state); + free(be_path); + continue; + } + xenbus_watch_path_token(XBT_NIL, be_state, be_state, &events); + state = XenbusStateConnected; + printk("pcifront_watches: waiting for backend events %s\n", be_state); + while ((err = xenbus_wait_for_state_change(be_state, &state, &events)) == NULL && + (err = xenbus_read(XBT_NIL, pcidev->backend, &msg)) == NULL) { + free(msg); + printk("pcifront_watches: backend state changed: %s %d\n", be_state, state); + if (state == XenbusStateReconfiguring) { + printk("pcifront_watches: writing %s %d\n", fe_state, XenbusStateReconfiguring); + if ((err = xenbus_switch_state(XBT_NIL, fe_state, XenbusStateReconfiguring)) != NULL) { + printk("pcifront_watches: error changing state to %d: %s\n", + XenbusStateReconfiguring, err); + if (!strcmp(err, "ENOENT")) { + xenbus_write(XBT_NIL, fe_state, "7"); + free(err); + } + } + } else if (state == XenbusStateReconfigured) { + printk("pcifront_watches: writing %s %d\n", fe_state, XenbusStateConnected); + printk("pcifront_watches: changing state to %d\n", XenbusStateConnected); + if ((err = xenbus_switch_state(XBT_NIL, fe_state, XenbusStateConnected)) != NULL) { + printk("pcifront_watches: error changing state to %d: %s\n", + XenbusStateConnected, err); + if (!strcmp(err, "ENOENT")) { + xenbus_write(XBT_NIL, fe_state, "4"); + free(err); + } + } + } else if (state == XenbusStateClosing) + break; + } + if (err) { + printk("pcifront_watches: done waiting err=%s\n", err); + free(err); + } else + printk("pcifront_watches: done waiting\n"); + err = xenbus_unwatch_path_token(XBT_NIL, be_state, be_state); + shutdown_pcifront(pcidev); + free(be_state); + free(be_path); + free(err); + pcidev = NULL; + } + + xenbus_unwatch_path_token(XBT_NIL, path, path); +} + +struct pcifront_dev *init_pcifront(char *_nodename) +{ + xenbus_transaction_t xbt; + char* err; + char* message=NULL; + int retry=0; + char* msg = NULL; + char* nodename = _nodename ? _nodename : "device/pci/0"; + int dom; + + struct pcifront_dev *dev; + + char path[strlen(nodename) + strlen("/backend-id") + 1]; + + if (!_nodename && pcidev) + return pcidev; + + printk("******************* PCIFRONT for %s **********\n\n\n", nodename); + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dom = xenbus_read_integer(path); + if (dom == -1) { + printk("no backend\n"); + return NULL; + } + + dev = malloc(sizeof(*dev)); + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); + dev->dom = dom; + + evtchn_alloc_unbound(dev->dom, pcifront_handler, dev, &dev->evtchn); + + dev->info = (struct xen_pci_sharedinfo*) alloc_page(); + memset(dev->info,0,PAGE_SIZE); + + dev->info_ref = gnttab_grant_access(dev->dom,virt_to_mfn(dev->info),0); + + dev->events = NULL; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "pci-op-ref","%u", + dev->info_ref); + if (err) { + message = "writing pci-op-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "event-channel", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "magic", XEN_PCI_MAGIC); + if (err) { + message = "writing magic"; + goto abort_transaction; + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateInitialised); + if (err) { + message = "switching state"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printk("Abort transaction %s\n", message); + goto error; + +done: + + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + if (msg) { + printk("Error %s when reading the backend path %s\n", msg, path); + goto error; + } + + printk("backend at %s\n", dev->backend); + + { + char path[strlen(dev->backend) + strlen("/state") + 1]; + char frontpath[strlen(nodename) + strlen("/state") + 1]; + XenbusState state; + snprintf(path, sizeof(path), "%s/state", dev->backend); + + xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); + + err = NULL; + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateConnected) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + if (state != XenbusStateConnected) { + printk("backend not avalable, state=%d\n", state); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + + snprintf(frontpath, sizeof(frontpath), "%s/state", nodename); + if ((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected)) + != NULL) { + printk("error switching state %s\n", err); + free(err); + err = xenbus_unwatch_path_token(XBT_NIL, path, path); + goto error; + } + } + unmask_evtchn(dev->evtchn); + + printk("**************************\n"); + + if (!_nodename) + pcidev = dev; + + return dev; + +error: + free(msg); + free(err); + free_pcifront(dev); + return NULL; +} + +void pcifront_scan(struct pcifront_dev *dev, void (*func)(unsigned int domain, unsigned int bus, unsigned slot, unsigned int fun)) +{ + char *path; + int i, n, len; + char *s, *msg = NULL; + unsigned int domain, bus, slot, fun; + + if (!dev) + dev = pcidev; + if (!dev) { + printk("pcifront_scan: device or bus\n"); + return; + } + + len = strlen(dev->backend) + 1 + 5 + 10 + 1; + path = (char *) malloc(len); + snprintf(path, len, "%s/num_devs", dev->backend); + n = xenbus_read_integer(path); + + for (i = 0; i < n; i++) { + snprintf(path, len, "%s/dev-%d", dev->backend, i); + msg = xenbus_read(XBT_NIL, path, &s); + if (msg) { + printk("Error %s when reading the PCI root name at %s\n", msg, path); + free(msg); + continue; + } + + if (sscanf(s, "%x:%x:%x.%x", &domain, &bus, &slot, &fun) != 4) { + printk("\"%s\" does not look like a PCI device address\n", s); + free(s); + continue; + } + free(s); + + if (func) + func(domain, bus, slot, fun); + } + free(path); +} + +void shutdown_pcifront(struct pcifront_dev *dev) +{ + char* err = NULL, *err2; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/event-channel") + 1]; + + printk("close pci: backend at %s\n",dev->backend); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { + printk("shutdown_pcifront: error changing state to %d: %s\n", + XenbusStateClosing, err); + goto close_pcifront; + } + state = xenbus_read_integer(path); + while (err == NULL && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { + printk("shutdown_pcifront: error changing state to %d: %s\n", + XenbusStateClosed, err); + goto close_pcifront; + } + state = xenbus_read_integer(path); + while (state < XenbusStateClosed) { + err = xenbus_wait_for_state_change(path, &state, &dev->events); + free(err); + } + + if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { + printk("shutdown_pcifront: error changing state to %d: %s\n", + XenbusStateInitialising, err); + goto close_pcifront; + } + state = xenbus_read_integer(path); + while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed)) + err = xenbus_wait_for_state_change(path, &state, &dev->events); + +close_pcifront: + free(err); + err2 = xenbus_unwatch_path_token(XBT_NIL, path, path); + free(err2); + + snprintf(nodename, sizeof(nodename), "%s/info-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + + if (!err) + free_pcifront(dev); +} + +int pcifront_physical_to_virtual (struct pcifront_dev *dev, + unsigned int *dom, + unsigned int *bus, + unsigned int *slot, + unsigned int *fun) +{ + /* FIXME: the buffer sizing is a little lazy here. 10 extra bytes + should be enough to hold the paths we need to construct, even + if the number of devices is large */ + char path[strlen(dev->backend) + strlen("/num_devs") + 10 + 1]; + int i, n; + char *s, *msg = NULL; + unsigned int dom1, bus1, slot1, fun1; + + if (!dev) + dev = pcidev; + + snprintf(path, sizeof(path), "%s/num_devs", dev->backend); + n = xenbus_read_integer(path); + + for (i = 0; i < n; i++) { + snprintf(path, sizeof(path), "%s/dev-%d", dev->backend, i); + msg = xenbus_read(XBT_NIL, path, &s); + if (msg) { + printk("Error %s when reading the PCI root name at %s\n", msg, path); + free(msg); + continue; + } + + if (sscanf(s, "%x:%x:%x.%x", &dom1, &bus1, &slot1, &fun1) != 4) { + printk("\"%s\" does not look like a PCI device address\n", s); + free(s); + continue; + } + free(s); + + if (dom1 == *dom && bus1 == *bus && slot1 == *slot && fun1 == *fun) { + snprintf(path, sizeof(path), "%s/vdev-%d", dev->backend, i); + msg = xenbus_read(XBT_NIL, path, &s); + if (msg) { + printk("Error %s when reading the PCI root name at %s\n", msg, path); + continue; + } + + if (sscanf(s, "%x:%x:%x.%x", dom, bus, slot, fun) != 4) { + printk("\"%s\" does not look like a PCI device address\n", s); + free(s); + continue; + } + free(s); + + return 0; + } + } + return -1; +} + +void pcifront_op(struct pcifront_dev *dev, struct xen_pci_op *op) +{ + if (!dev) + dev = pcidev; + dev->info->op = *op; + /* Make sure info is written before the flag */ + wmb(); + set_bit(_XEN_PCIF_active, (void*) &dev->info->flags); + notify_remote_via_evtchn(dev->evtchn); + + wait_event(pcifront_queue, !test_bit(_XEN_PCIF_active, (void*) &dev->info->flags)); + + /* Make sure flag is read before info */ + rmb(); + *op = dev->info->op; +} + +int pcifront_conf_read(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + unsigned int off, unsigned int size, unsigned int *val) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_conf_read; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + op.offset = off; + op.size = size; + + pcifront_op(dev, &op); + + if (op.err) + return op.err; + + *val = op.value; + + return 0; +} + +int pcifront_conf_write(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + unsigned int off, unsigned int size, unsigned int val) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_conf_write; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + op.offset = off; + op.size = size; + + op.value = val; + + pcifront_op(dev, &op); + + return op.err; +} + +int pcifront_enable_msi(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_enable_msi; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + + pcifront_op(dev, &op); + + if (op.err) + return op.err; + else + return op.value; +} + +int pcifront_disable_msi(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_disable_msi; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + + pcifront_op(dev, &op); + + return op.err; +} + +int pcifront_enable_msix(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun, + struct xen_msix_entry *entries, int n) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + if (n > SH_INFO_MAX_VEC) + return XEN_PCI_ERR_op_failed; + + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_enable_msix; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + op.value = n; + + memcpy(op.msix_entries, entries, n * sizeof(*entries)); + + pcifront_op(dev, &op); + + if (op.err) + return op.err; + + memcpy(entries, op.msix_entries, n * sizeof(*entries)); + + return 0; +} + + +int pcifront_disable_msix(struct pcifront_dev *dev, + unsigned int dom, + unsigned int bus, unsigned int slot, unsigned int fun) +{ + struct xen_pci_op op; + + if (!dev) + dev = pcidev; + if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) + return XEN_PCI_ERR_dev_not_found; + memset(&op, 0, sizeof(op)); + + op.cmd = XEN_PCI_OP_disable_msix; + op.domain = dom; + op.bus = bus; + op.devfn = PCI_DEVFN(slot, fun); + + pcifront_op(dev, &op); + + return op.err; +} diff -Nru xen-4.6.0/extras/mini-os/README xen-4.6.5/extras/mini-os/README --- xen-4.6.0/extras/mini-os/README 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/README 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,46 @@ + Minimal OS + ---------- + +This shows some of the stuff that any guest OS will have to set up. + +This includes: + + * installing a virtual exception table + * handling virtual exceptions + * handling asynchronous events + * enabling/disabling async events + * parsing start_info struct at start-of-day + * registering virtual interrupt handlers (for timer interrupts) + * a simple page and memory allocator + * minimal libc support + * minimal Copy-on-Write support + * network, block, framebuffer support + * transparent access to FileSystem exports (see tools/fs-back) + +- to build it just type make. + +- to build it with TCP/IP support, download LWIP 1.3.2 source code and type + + make LWIPDIR=/path/to/lwip/source + +- to build it with much better libc support, see the stubdom/ directory + +- to start it do the following in domain0 + # xl create -c domain_config + +This starts the kernel and prints out a bunch of stuff and then once every +second the system time. + +If you have setup a disk in the config file (e.g. +disk = [ 'file:/tmp/foo,hda,r' ] ), it will loop reading it. If that disk is +writable (e.g. disk = [ 'file:/tmp/foo,hda,w' ] ), it will write data patterns +and re-read them. + +If you have setup a network in the config file (e.g. vif = [''] ), it will +print incoming packets. + +If you have setup a VFB in the config file (e.g. vfb = ['type=sdl'] ), it will +show a mouse with which you can draw color squares. + +If you have compiled it with TCP/IP support, it will run a daytime server on +TCP port 13. diff -Nru xen-4.6.0/extras/mini-os/sched.c xen-4.6.5/extras/mini-os/sched.c --- xen-4.6.0/extras/mini-os/sched.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/sched.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,304 @@ +/* + **************************************************************************** + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: sched.c + * Author: Grzegorz Milos + * Changes: Robert Kaiser + * + * Date: Aug 2005 + * + * Environment: Xen Minimal OS + * Description: simple scheduler for Mini-Os + * + * The scheduler is non-preemptive (cooperative), and schedules according + * to Round Robin algorithm. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifdef SCHED_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=sched.c, line=%d) " _f "\n", __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + +MINIOS_TAILQ_HEAD(thread_list, struct thread); + +struct thread *idle_thread = NULL; +static struct thread_list exited_threads = MINIOS_TAILQ_HEAD_INITIALIZER(exited_threads); +static struct thread_list thread_list = MINIOS_TAILQ_HEAD_INITIALIZER(thread_list); +static int threads_started; + +struct thread *main_thread; + +void inline print_runqueue(void) +{ + struct thread *th; + MINIOS_TAILQ_FOREACH(th, &thread_list, thread_list) + { + printk(" Thread \"%s\", runnable=%d\n", th->name, is_runnable(th)); + } + printk("\n"); +} + +void schedule(void) +{ + struct thread *prev, *next, *thread, *tmp; + unsigned long flags; + + if (irqs_disabled()) { + printk("Must not call schedule() with IRQs disabled\n"); + BUG(); + } + + prev = current; + local_irq_save(flags); + + if (in_callback) { + printk("Must not call schedule() from a callback\n"); + BUG(); + } + + do { + /* Examine all threads. + Find a runnable thread, but also wake up expired ones and find the + time when the next timeout expires, else use 10 seconds. */ + s_time_t now = NOW(); + s_time_t min_wakeup_time = now + SECONDS(10); + next = NULL; + MINIOS_TAILQ_FOREACH_SAFE(thread, &thread_list, thread_list, tmp) + { + if (!is_runnable(thread) && thread->wakeup_time != 0LL) + { + if (thread->wakeup_time <= now) + wake(thread); + else if (thread->wakeup_time < min_wakeup_time) + min_wakeup_time = thread->wakeup_time; + } + if(is_runnable(thread)) + { + next = thread; + /* Put this thread on the end of the list */ + MINIOS_TAILQ_REMOVE(&thread_list, thread, thread_list); + MINIOS_TAILQ_INSERT_TAIL(&thread_list, thread, thread_list); + break; + } + } + if (next) + break; + /* block until the next timeout expires, or for 10 secs, whichever comes first */ + block_domain(min_wakeup_time); + /* handle pending events if any */ + force_evtchn_callback(); + } while(1); + local_irq_restore(flags); + /* Interrupting the switch is equivalent to having the next thread + inturrupted at the return instruction. And therefore at safe point. */ + if(prev != next) switch_threads(prev, next); + + MINIOS_TAILQ_FOREACH_SAFE(thread, &exited_threads, thread_list, tmp) + { + if(thread != prev) + { + MINIOS_TAILQ_REMOVE(&exited_threads, thread, thread_list); + free_pages(thread->stack, STACK_SIZE_PAGE_ORDER); + xfree(thread); + } + } +} + +struct thread* create_thread(char *name, void (*function)(void *), void *data) +{ + struct thread *thread; + unsigned long flags; + /* Call architecture specific setup. */ + thread = arch_create_thread(name, function, data); + /* Not runable, not exited, not sleeping */ + thread->flags = 0; + thread->wakeup_time = 0LL; +#ifdef HAVE_LIBC + _REENT_INIT_PTR((&thread->reent)) +#endif + set_runnable(thread); + local_irq_save(flags); + MINIOS_TAILQ_INSERT_TAIL(&thread_list, thread, thread_list); + local_irq_restore(flags); + return thread; +} + +#ifdef HAVE_LIBC +static struct _reent callback_reent; +struct _reent *__getreent(void) +{ + struct _reent *_reent; + + if (!threads_started) + _reent = _impure_ptr; + else if (in_callback) + _reent = &callback_reent; + else + _reent = &get_current()->reent; + +#ifndef NDEBUG +#if defined(__x86_64__) || defined(__x86__) + { +#ifdef __x86_64__ + register unsigned long sp asm ("rsp"); +#else + register unsigned long sp asm ("esp"); +#endif + if ((sp & (STACK_SIZE-1)) < STACK_SIZE / 16) { + static int overflowing; + if (!overflowing) { + overflowing = 1; + printk("stack overflow\n"); + BUG(); + } + } + } +#endif +#else +#error Not implemented yet +#endif + return _reent; +} +#endif + +void exit_thread(void) +{ + unsigned long flags; + struct thread *thread = current; + printk("Thread \"%s\" exited.\n", thread->name); + local_irq_save(flags); + /* Remove from the thread list */ + MINIOS_TAILQ_REMOVE(&thread_list, thread, thread_list); + clear_runnable(thread); + /* Put onto exited list */ + MINIOS_TAILQ_INSERT_HEAD(&exited_threads, thread, thread_list); + local_irq_restore(flags); + /* Schedule will free the resources */ + while(1) + { + schedule(); + printk("schedule() returned! Trying again\n"); + } +} + +void block(struct thread *thread) +{ + thread->wakeup_time = 0LL; + clear_runnable(thread); +} + +void msleep(uint32_t millisecs) +{ + struct thread *thread = get_current(); + thread->wakeup_time = NOW() + MILLISECS(millisecs); + clear_runnable(thread); + schedule(); +} + +void wake(struct thread *thread) +{ + thread->wakeup_time = 0LL; + set_runnable(thread); +} + +void idle_thread_fn(void *unused) +{ + threads_started = 1; + while (1) { + block(current); + schedule(); + } +} + +DECLARE_MUTEX(mutex); + +void th_f1(void *data) +{ + struct timeval tv1, tv2; + + for(;;) + { + down(&mutex); + printk("Thread \"%s\" got semaphore, runnable %d\n", current->name, is_runnable(current)); + schedule(); + printk("Thread \"%s\" releases the semaphore\n", current->name); + up(&mutex); + + + gettimeofday(&tv1, NULL); + for(;;) + { + gettimeofday(&tv2, NULL); + if(tv2.tv_sec - tv1.tv_sec > 2) break; + } + + + schedule(); + } +} + +void th_f2(void *data) +{ + for(;;) + { + printk("Thread OTHER executing, data 0x%p\n", data); + schedule(); + } +} + + + +void init_sched(void) +{ + printk("Initialising scheduler\n"); + +#ifdef HAVE_LIBC + _REENT_INIT_PTR((&callback_reent)) +#endif + idle_thread = create_thread("Idle", idle_thread_fn, NULL); +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -Nru xen-4.6.0/extras/mini-os/test.c xen-4.6.5/extras/mini-os/test.c --- xen-4.6.0/extras/mini-os/test.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/test.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,579 @@ +/****************************************************************************** + * test.c + * + * Test code for all the various frontends; split from kernel.c + * + * Copyright (c) 2002-2003, K A Fraser & R Neugebauer + * Copyright (c) 2005, Grzegorz Milos, Intel Research Cambridge + * Copyright (c) 2006, Robert Kaiser, FH Wiesbaden + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_XENBUS +static unsigned int do_shutdown = 0; +static unsigned int shutdown_reason; +static DECLARE_WAIT_QUEUE_HEAD(shutdown_queue); +#endif + +#ifdef CONFIG_XENBUS +void test_xenbus(void); + +static void xenbus_tester(void *p) +{ + test_xenbus(); +} +#endif + +#ifndef HAVE_LIBC +/* Should be random enough for our uses */ +int rand(void) +{ + static unsigned int previous; + struct timeval tv; + gettimeofday(&tv, NULL); + previous += tv.tv_sec + tv.tv_usec; + previous *= RAND_MIX; + return previous; +} +#endif + +static void periodic_thread(void *p) +{ + struct timeval tv; + printk("Periodic thread started.\n"); + for(;;) + { + gettimeofday(&tv, NULL); + printk("T(s=%ld us=%ld)\n", tv.tv_sec, tv.tv_usec); + msleep(1000); + } +} + +#ifdef CONFIG_NETFRONT +static struct netfront_dev *net_dev; +static struct semaphore net_sem = __SEMAPHORE_INITIALIZER(net_sem, 0); + +static void netfront_thread(void *p) +{ + net_dev = init_netfront(NULL, NULL, NULL, NULL); + up(&net_sem); +} +#endif + +#ifdef CONFIG_BLKFRONT +static struct blkfront_dev *blk_dev; +static struct blkfront_info blk_info; +static uint64_t blk_size_read; +static uint64_t blk_size_write; +static struct semaphore blk_sem = __SEMAPHORE_INITIALIZER(blk_sem, 0);; + +struct blk_req { + struct blkfront_aiocb aiocb; + int rand_value; + struct blk_req *next; +}; + +#ifdef BLKTEST_WRITE +static struct blk_req *blk_to_read; +#endif + +static struct blk_req *blk_alloc_req(uint64_t sector) +{ + struct blk_req *req = xmalloc(struct blk_req); + req->aiocb.aio_dev = blk_dev; + req->aiocb.aio_buf = _xmalloc(blk_info.sector_size, blk_info.sector_size); + req->aiocb.aio_nbytes = blk_info.sector_size; + req->aiocb.aio_offset = sector * blk_info.sector_size; + req->aiocb.data = req; + req->next = NULL; + return req; +} + +static void blk_read_completed(struct blkfront_aiocb *aiocb, int ret) +{ + struct blk_req *req = aiocb->data; + if (ret) + printk("got error code %d when reading at offset %ld\n", ret, (long) aiocb->aio_offset); + else + blk_size_read += blk_info.sector_size; + free(aiocb->aio_buf); + free(req); +} + +static void blk_read_sector(uint64_t sector) +{ + struct blk_req *req; + + req = blk_alloc_req(sector); + req->aiocb.aio_cb = blk_read_completed; + + blkfront_aio_read(&req->aiocb); +} + +#ifdef BLKTEST_WRITE +static void blk_write_read_completed(struct blkfront_aiocb *aiocb, int ret) +{ + struct blk_req *req = aiocb->data; + int rand_value; + int i; + int *buf; + + if (ret) { + printk("got error code %d when reading back at offset %ld\n", ret, aiocb->aio_offset); + free(aiocb->aio_buf); + free(req); + return; + } + blk_size_read += blk_info.sector_size; + buf = (int*) aiocb->aio_buf; + rand_value = req->rand_value; + for (i = 0; i < blk_info.sector_size / sizeof(int); i++) { + if (buf[i] != rand_value) { + printk("bogus data at offset %ld\n", aiocb->aio_offset + i); + break; + } + rand_value *= RAND_MIX; + } + free(aiocb->aio_buf); + free(req); +} + +static void blk_write_completed(struct blkfront_aiocb *aiocb, int ret) +{ + struct blk_req *req = aiocb->data; + if (ret) { + printk("got error code %d when writing at offset %ld\n", ret, aiocb->aio_offset); + free(aiocb->aio_buf); + free(req); + return; + } + blk_size_write += blk_info.sector_size; + /* Push write check */ + req->next = blk_to_read; + blk_to_read = req; +} + +static void blk_write_sector(uint64_t sector) +{ + struct blk_req *req; + int rand_value; + int i; + int *buf; + + req = blk_alloc_req(sector); + req->aiocb.aio_cb = blk_write_completed; + req->rand_value = rand_value = rand(); + + buf = (int*) req->aiocb.aio_buf; + for (i = 0; i < blk_info.sector_size / sizeof(int); i++) { + buf[i] = rand_value; + rand_value *= RAND_MIX; + } + + blkfront_aio_write(&req->aiocb); +} +#endif + +static void blkfront_thread(void *p) +{ + time_t lasttime = 0; + + blk_dev = init_blkfront(NULL, &blk_info); + if (!blk_dev) { + up(&blk_sem); + return; + } + + if (blk_info.info & VDISK_CDROM) + printk("Block device is a CDROM\n"); + if (blk_info.info & VDISK_REMOVABLE) + printk("Block device is removable\n"); + if (blk_info.info & VDISK_READONLY) + printk("Block device is read-only\n"); + +#ifdef BLKTEST_WRITE + if (blk_info.mode == O_RDWR) { + blk_write_sector(0); + blk_write_sector(blk_info.sectors-1); + } else +#endif + { + blk_read_sector(0); + blk_read_sector(blk_info.sectors-1); + } + + while (!do_shutdown) { + uint64_t sector = rand() % blk_info.sectors; + struct timeval tv; +#ifdef BLKTEST_WRITE + if (blk_info.mode == O_RDWR) + blk_write_sector(sector); + else +#endif + blk_read_sector(sector); + blkfront_aio_poll(blk_dev); + gettimeofday(&tv, NULL); + if (tv.tv_sec > lasttime + 10) { + printk("%llu read, %llu write\n", + (unsigned long long) blk_size_read, + (unsigned long long) blk_size_write); + lasttime = tv.tv_sec; + } + +#ifdef BLKTEST_WRITE + while (blk_to_read) { + struct blk_req *req = blk_to_read; + blk_to_read = blk_to_read->next; + req->aiocb.aio_cb = blk_write_read_completed; + blkfront_aio_read(&req->aiocb); + } +#endif + } + up(&blk_sem); +} +#endif + +#if defined(CONFIG_FBFRONT) && defined(CONFIG_KBDFRONT) +#define WIDTH 800 +#define HEIGHT 600 +#define DEPTH 32 + +static uint32_t *fb; +static int refresh_period = 50; +static struct fbfront_dev *fb_dev; +static struct semaphore fbfront_sem = __SEMAPHORE_INITIALIZER(fbfront_sem, 0); + +static void fbfront_drawvert(int x, int y1, int y2, uint32_t color) +{ + int y; + if (x < 0) + return; + if (x >= WIDTH) + return; + if (y1 < 0) + y1 = 0; + if (y2 >= HEIGHT) + y2 = HEIGHT-1; + for (y = y1; y <= y2; y++) + fb[x + y*WIDTH] ^= color; +} + +static void fbfront_drawhoriz(int x1, int x2, int y, uint32_t color) +{ + int x; + if (y < 0) + return; + if (y >= HEIGHT) + return; + if (x1 < 0) + x1 = 0; + if (x2 >= WIDTH) + x2 = WIDTH-1; + for (x = x1; x <= x2; x++) + fb[x + y*WIDTH] ^= color; +} + +static void fbfront_thread(void *p) +{ + size_t line_length = WIDTH * (DEPTH / 8); + size_t memsize = HEIGHT * line_length; + unsigned long *mfns; + int i, n = (memsize + PAGE_SIZE-1) / PAGE_SIZE; + + memsize = n * PAGE_SIZE; + fb = _xmalloc(memsize, PAGE_SIZE); + memset(fb, 0, memsize); + mfns = xmalloc_array(unsigned long, n); + for (i = 0; i < n; i++) + mfns[i] = virtual_to_mfn((char *) fb + i * PAGE_SIZE); + fb_dev = init_fbfront(NULL, mfns, WIDTH, HEIGHT, DEPTH, line_length, n); + xfree(mfns); + if (!fb_dev) { + xfree(fb); + } + up(&fbfront_sem); +} + +static void clip_cursor(int *x, int *y) +{ + if (*x < 0) + *x = 0; + if (*x >= WIDTH) + *x = WIDTH - 1; + if (*y < 0) + *y = 0; + if (*y >= HEIGHT) + *y = HEIGHT - 1; +} + +static void refresh_cursor(int new_x, int new_y) +{ + static int old_x = -1, old_y = -1; + + if (!refresh_period) + return; + + if (old_x != -1 && old_y != -1) { + fbfront_drawvert(old_x, old_y + 1, old_y + 8, 0xffffffff); + fbfront_drawhoriz(old_x + 1, old_x + 8, old_y, 0xffffffff); + fbfront_update(fb_dev, old_x, old_y, 9, 9); + } + old_x = new_x; + old_y = new_y; + fbfront_drawvert(new_x, new_y + 1, new_y + 8, 0xffffffff); + fbfront_drawhoriz(new_x + 1, new_x + 8, new_y, 0xffffffff); + fbfront_update(fb_dev, new_x, new_y, 9, 9); +} + +static struct kbdfront_dev *kbd_dev; +static struct semaphore kbd_sem = __SEMAPHORE_INITIALIZER(kbd_sem, 0); +static void kbdfront_thread(void *p) +{ + DEFINE_WAIT(w); + DEFINE_WAIT(w2); + DEFINE_WAIT(w3); + int x = WIDTH / 2, y = HEIGHT / 2, z = 0; + + kbd_dev = init_kbdfront(NULL, 1); + down(&fbfront_sem); + if (!kbd_dev) { + up(&kbd_sem); + return; + } + + refresh_cursor(x, y); + while (1) { + union xenkbd_in_event kbdevent; + union xenfb_in_event fbevent; + int sleep = 1; + + add_waiter(w, kbdfront_queue); + add_waiter(w2, fbfront_queue); + add_waiter(w3, shutdown_queue); + + rmb(); + if (do_shutdown) + break; + + while (kbdfront_receive(kbd_dev, &kbdevent, 1) != 0) { + sleep = 0; + switch(kbdevent.type) { + case XENKBD_TYPE_MOTION: + printk("motion x:%d y:%d z:%d\n", + kbdevent.motion.rel_x, + kbdevent.motion.rel_y, + kbdevent.motion.rel_z); + x += kbdevent.motion.rel_x; + y += kbdevent.motion.rel_y; + z += kbdevent.motion.rel_z; + clip_cursor(&x, &y); + refresh_cursor(x, y); + break; + case XENKBD_TYPE_POS: + printk("pos x:%d y:%d dz:%d\n", + kbdevent.pos.abs_x, + kbdevent.pos.abs_y, + kbdevent.pos.rel_z); + x = kbdevent.pos.abs_x; + y = kbdevent.pos.abs_y; + z = kbdevent.pos.rel_z; + clip_cursor(&x, &y); + refresh_cursor(x, y); + break; + case XENKBD_TYPE_KEY: + printk("key %d %s\n", + kbdevent.key.keycode, + kbdevent.key.pressed ? "pressed" : "released"); + if (kbdevent.key.keycode == BTN_LEFT) { + printk("mouse %s at (%d,%d,%d)\n", + kbdevent.key.pressed ? "clic" : "release", x, y, z); + if (kbdevent.key.pressed) { + uint32_t color = rand(); + fbfront_drawvert(x - 16, y - 16, y + 15, color); + fbfront_drawhoriz(x - 16, x + 15, y + 16, color); + fbfront_drawvert(x + 16, y - 15, y + 16, color); + fbfront_drawhoriz(x - 15, x + 16, y - 16, color); + fbfront_update(fb_dev, x - 16, y - 16, 33, 33); + } + } else if (kbdevent.key.keycode == KEY_Q) { + shutdown_reason = SHUTDOWN_poweroff; + wmb(); + do_shutdown = 1; + wmb(); + wake_up(&shutdown_queue); + } + break; + } + } + while (fbfront_receive(fb_dev, &fbevent, 1) != 0) { + sleep = 0; + switch(fbevent.type) { + case XENFB_TYPE_REFRESH_PERIOD: + refresh_period = fbevent.refresh_period.period; + printk("refresh period %d\n", refresh_period); + refresh_cursor(x, y); + break; + } + } + if (sleep) + schedule(); + remove_waiter(w3, shutdown_queue); + remove_waiter(w2, fbfront_queue); + remove_waiter(w, kbdfront_queue); + } + up(&kbd_sem); +} +#endif + +#ifdef CONFIG_PCIFRONT +static struct pcifront_dev *pci_dev; +static struct semaphore pci_sem = __SEMAPHORE_INITIALIZER(pci_sem, 0); + +static void print_pcidev(unsigned int domain, unsigned int bus, unsigned int slot, unsigned int fun) +{ + unsigned int vendor, device, rev, class; + + pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x00, 2, &vendor); + pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x02, 2, &device); + pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x08, 1, &rev); + pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x0a, 2, &class); + + printk("%04x:%02x:%02x.%02x %04x: %04x:%04x (rev %02x)\n", domain, bus, slot, fun, class, vendor, device, rev); +} + +static void pcifront_thread(void *p) +{ + pcifront_watches(NULL); + pci_dev = init_pcifront(NULL); + if (!pci_dev) { + up(&pci_sem); + return; + } + printk("PCI devices:\n"); + pcifront_scan(pci_dev, print_pcidev); + up(&pci_sem); +} +#endif + +void shutdown_frontends(void) +{ +#ifdef CONFIG_NETFRONT + down(&net_sem); + if (net_dev) + shutdown_netfront(net_dev); +#endif + +#ifdef CONFIG_BLKFRONT + down(&blk_sem); + if (blk_dev) + shutdown_blkfront(blk_dev); +#endif + +#if defined(CONFIG_FBFRONT) && defined(CONFIG_KBDFRONT) + if (fb_dev) + shutdown_fbfront(fb_dev); + + down(&kbd_sem); + if (kbd_dev) + shutdown_kbdfront(kbd_dev); +#endif + +#ifdef CONFIG_PCIFRONT + down(&pci_sem); + if (pci_dev) + shutdown_pcifront(pci_dev); +#endif +} + +#ifdef CONFIG_XENBUS +void app_shutdown(unsigned reason) +{ + shutdown_reason = reason; + wmb(); + do_shutdown = 1; + wmb(); + wake_up(&shutdown_queue); +} + +static void shutdown_thread(void *p) +{ + DEFINE_WAIT(w); + + while (1) { + add_waiter(w, shutdown_queue); + rmb(); + if (do_shutdown) { + rmb(); + break; + } + schedule(); + remove_waiter(w, shutdown_queue); + } + + shutdown_frontends(); + + HYPERVISOR_shutdown(shutdown_reason); +} +#endif + +int app_main(start_info_t *si) +{ + printk("Test main: start_info=%p\n", si); +#ifdef CONFIG_XENBUS + create_thread("xenbus_tester", xenbus_tester, si); +#endif + create_thread("periodic_thread", periodic_thread, si); +#ifdef CONFIG_NETFRONT + create_thread("netfront", netfront_thread, si); +#endif +#ifdef CONFIG_BLKFRONT + create_thread("blkfront", blkfront_thread, si); +#endif +#if defined(CONFIG_FBFRONT) && defined(CONFIG_KBDFRONT) + create_thread("fbfront", fbfront_thread, si); + create_thread("kbdfront", kbdfront_thread, si); +#endif +#ifdef CONFIG_PCIFRONT + create_thread("pcifront", pcifront_thread, si); +#endif +#ifdef CONFIG_XENBUS + create_thread("shutdown", shutdown_thread, si); +#endif + return 0; +} diff -Nru xen-4.6.0/extras/mini-os/tpmback.c xen-4.6.5/extras/mini-os/tpmback.c --- xen-4.6.0/extras/mini-os/tpmback.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/tpmback.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,1136 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/xen/tpmback/tpmback.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself derived from drivers/xen/netback/netback.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2002-2004, K A Fraser + * + * This code has also been derived from drivers/xen/tpmback/xenbus.c + * from the xen 2.6.18 linux kernel + * + * Copyright (C) 2005 IBM Corporation + * Copyright (C) 2005 Rusty Russell + * + * This code has also been derived from drivers/xen/tpmback/interface.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself also derived from drvivers/xen/netback/interface.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2 + * of the License + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifndef HAVE_LIBC +#define strtoul simple_strtoul +#endif + +//#define TPMBACK_PRINT_DEBUG +#ifdef TPMBACK_PRINT_DEBUG +#define TPMBACK_DEBUG(fmt,...) printk("Tpmback:Debug("__FILE__":%d) " fmt, __LINE__, ##__VA_ARGS__) +#define TPMBACK_DEBUG_MORE(fmt,...) printk(fmt, ##__VA_ARGS__) +#else +#define TPMBACK_DEBUG(fmt,...) +#endif +#define TPMBACK_ERR(fmt,...) printk("Tpmback:Error " fmt, ##__VA_ARGS__) +#define TPMBACK_LOG(fmt,...) printk("Tpmback:Info " fmt, ##__VA_ARGS__) + +#define min(a,b) (((a) < (b)) ? (a) : (b)) + +/* Default size of the tpmif array at initialization */ +#define DEF_ARRAY_SIZE 1 + +/* tpmif and tpmdev flags */ +#define TPMIF_CLOSED 1 +#define TPMIF_REQ_READY 2 + +struct tpmif { + domid_t domid; + unsigned int handle; + + char* fe_path; + char* fe_state_path; + + /* Locally bound event channel*/ + evtchn_port_t evtchn; + + /* Shared page */ + tpmif_shared_page_t *page; + + enum xenbus_state state; + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; + + unsigned char uuid[16]; + void* opaque; + + /* state flags */ + int flags; +}; +typedef struct tpmif tpmif_t; + +struct tpmback_dev { + + tpmif_t** tpmlist; + unsigned long num_tpms; + unsigned long num_alloc; + + struct gntmap map; + + /* True if at least one tpmif has a request to be handled */ + int flags; + + xenbus_event_queue events; + + /* Callbacks */ + void (*open_callback)(domid_t, unsigned int); + void (*close_callback)(domid_t, unsigned int); +}; +typedef struct tpmback_dev tpmback_dev_t; + +enum { EV_NONE, EV_NEWFE, EV_STCHNG } tpm_ev_enum; + +/* Global objects */ +static struct thread* eventthread = NULL; +static tpmback_dev_t gtpmdev = { + .tpmlist = NULL, + .num_tpms = 0, + .num_alloc = 0, + .flags = TPMIF_CLOSED, + .events = NULL, + .open_callback = NULL, + .close_callback = NULL, +}; +struct wait_queue_head waitq; +int globalinit = 0; + +/************************************ + * TPMIF SORTED ARRAY FUNCTIONS + * tpmback_dev_t.tpmlist is a sorted array, sorted by domid and then handle number + * Duplicates are not allowed + * **********************************/ + +static void tpmif_req_ready(tpmif_t* tpmif) { + tpmif->flags |= TPMIF_REQ_READY; + gtpmdev.flags |= TPMIF_REQ_READY; +} + +static void tpmdev_check_req(void) { + int i; + int flags; + local_irq_save(flags); + for(i = 0; i < gtpmdev.num_tpms; ++i) { + if(gtpmdev.tpmlist[i]->flags & TPMIF_REQ_READY) { + gtpmdev.flags |= TPMIF_REQ_READY; + local_irq_restore(flags); + return; + } + } + gtpmdev.flags &= ~TPMIF_REQ_READY; + local_irq_restore(flags); +} + +static void tpmif_req_finished(tpmif_t* tpmif) { + tpmif->flags &= ~TPMIF_REQ_READY; + tpmdev_check_req(); +} + +int __get_tpmif_index(int st, int n, domid_t domid, unsigned int handle) +{ + int i = st + n /2; + tpmif_t* tmp; + + if( n <= 0 ) + return -1; + + tmp = gtpmdev.tpmlist[i]; + if(domid == tmp->domid && tmp->handle == handle) { + return i; + } else if ( (domid < tmp->domid) || + (domid == tmp->domid && handle < tmp->handle)) { + return __get_tpmif_index(st, n/2, domid, handle); + } else { + return __get_tpmif_index(i + 1, n/2 - ((n +1) % 2), domid, handle); + } +} + +/* Returns the array index of the tpmif domid/handle. Returns -1 if no such tpmif exists */ +int get_tpmif_index(domid_t domid, unsigned int handle) +{ + int flags; + int index; + local_irq_save(flags); + index = __get_tpmif_index(0, gtpmdev.num_tpms, domid, handle); + local_irq_restore(flags); + return index; +} + +/* Returns the tpmif domid/handle or NULL if none exists */ +tpmif_t* get_tpmif(domid_t domid, unsigned int handle) +{ + int flags; + int i; + tpmif_t* ret; + local_irq_save(flags); + i = get_tpmif_index(domid, handle); + if (i < 0) { + ret = NULL; + } else { + ret = gtpmdev.tpmlist[i]; + } + local_irq_restore(flags); + return ret; +} + +/* Remove the given tpmif. Returns 0 if it was removed, -1 if it was not removed */ +int remove_tpmif(tpmif_t* tpmif) +{ + int i, j; + char* err; + int flags; + local_irq_save(flags); + + /* Find the index in the array if it exists */ + i = get_tpmif_index(tpmif->domid, tpmif->handle); + if (i < 0) { + goto error; + } + + /* Remove the interface from the list */ + for(j = i; j < gtpmdev.num_tpms - 1; ++j) { + gtpmdev.tpmlist[j] = gtpmdev.tpmlist[j+1]; + } + gtpmdev.tpmlist[j] = NULL; + --gtpmdev.num_tpms; + + /* If removed tpm was the only ready tpm, then we need to check and turn off the ready flag */ + tpmdev_check_req(); + + local_irq_restore(flags); + + /* Stop listening for events on this tpm interface */ + if((err = xenbus_unwatch_path_token(XBT_NIL, tpmif->fe_state_path, tpmif->fe_state_path))) { + TPMBACK_ERR("Unable to unwatch path token `%s' Error was %s Ignoring..\n", tpmif->fe_state_path, err); + free(err); + } + + return 0; +error: + local_irq_restore(flags); + return -1; +} + +/* Insert tpmif into dev->tpmlist. Returns 0 on success and non zero on error. + * It is an error to insert a tpmif with the same domid and handle + * number + * as something already in the list */ +int insert_tpmif(tpmif_t* tpmif) +{ + int flags; + unsigned int i, j; + tpmif_t* tmp; + char* err; + char path[512]; + + local_irq_save(flags); + + /*Check if we need to allocate more space */ + if (gtpmdev.num_tpms == gtpmdev.num_alloc) { + gtpmdev.num_alloc *= 2; + gtpmdev.tpmlist = realloc(gtpmdev.tpmlist, gtpmdev.num_alloc); + } + + /*Find where to put the new interface */ + for(i = 0; i < gtpmdev.num_tpms; ++i) + { + tmp = gtpmdev.tpmlist[i]; + if(tpmif->domid == tmp->domid && tpmif->handle == tmp->handle) { + TPMBACK_ERR("Tried to insert duplicate tpm interface %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + goto error; + } + if((tpmif->domid < tmp->domid) || + (tpmif->domid == tmp->domid && tpmif->handle < tmp->handle)) { + break; + } + } + + /*Shift all the tpm pointers past i down one */ + for(j = gtpmdev.num_tpms; j > i; --j) { + gtpmdev.tpmlist[j] = gtpmdev.tpmlist[j-1]; + } + + /*Add the new interface */ + gtpmdev.tpmlist[i] = tpmif; + ++gtpmdev.num_tpms; + + /*Should not be needed, anything inserted with ready flag is probably an error */ + tpmdev_check_req(); + + local_irq_restore(flags); + + snprintf(path, 512, "backend/vtpm/%u/%u/feature-protocol-v2", (unsigned int) tpmif->domid, tpmif->handle); + if ((err = xenbus_write(XBT_NIL, path, "1"))) + { + /* if we got an error here we should carefully remove the interface and then return */ + TPMBACK_ERR("Unable to write feature-protocol-v2 node: %s\n", err); + free(err); + remove_tpmif(tpmif); + goto error_post_irq; + } + + /*Listen for state changes on the new interface */ + if((err = xenbus_watch_path_token(XBT_NIL, tpmif->fe_state_path, tpmif->fe_state_path, >pmdev.events))) + { + /* if we got an error here we should carefully remove the interface and then return */ + TPMBACK_ERR("Unable to watch path token `%s' Error was %s\n", tpmif->fe_state_path, err); + free(err); + remove_tpmif(tpmif); + goto error_post_irq; + } + return 0; +error: + local_irq_restore(flags); +error_post_irq: + return -1; +} + + +/***************** + * CHANGE BACKEND STATE + * *****************/ +/*Attempts to change the backend state in xenstore + * returns 0 on success and non-zero on error */ +int tpmif_change_state(tpmif_t* tpmif, enum xenbus_state state) +{ + int tempst; + char path[512]; + char *value; + char *err; + enum xenbus_state readst; + TPMBACK_DEBUG("Backend state change %u/%u from=%d to=%d\n", (unsigned int) tpmif->domid, tpmif->handle, tpmif->state, state); + if (tpmif->state == state) + return 0; + + snprintf(path, 512, "backend/vtpm/%u/%u/state", (unsigned int) tpmif->domid, tpmif->handle); + + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMBACK_ERR("Unable to read backend state %s, error was %s\n", path, err); + free(err); + return -1; + } + if(sscanf(value, "%d", &tempst) != 1) { + TPMBACK_ERR("Non integer value (%s) in %s ??\n", value, path); + free(value); + return -1; + } + readst = (enum xenbus_state) tempst; + free(value); + + /* It's possible that the backend state got updated by hotplug or something else behind our back */ + if(readst != tpmif->state) { + TPMBACK_DEBUG("tpm interface state was %d but xenstore state was %d!\n", tpmif->state, readst); + tpmif->state = readst; + } + + /*If if the state isnt changing, then we dont update xenstore b/c we dont want to fire extraneous events */ + if(tpmif->state == state) { + return 0; + } + + /*update xenstore*/ + snprintf(path, 512, "backend/vtpm/%u/%u", (unsigned int) tpmif->domid, tpmif->handle); + if((err = xenbus_printf(XBT_NIL, path, "state", "%u", state))) { + TPMBACK_ERR("Error writing to xenstore %s, error was %s new state=%d\n", path, err, state); + free(err); + return -1; + } + + tpmif->state = state; + + return 0; +} +/********************************** + * TPMIF CREATION AND DELETION + * *******************************/ +static tpmif_t* __init_tpmif(domid_t domid, unsigned int handle) +{ + tpmif_t* tpmif; + tpmif = malloc(sizeof(*tpmif)); + tpmif->domid = domid; + tpmif->handle = handle; + tpmif->fe_path = NULL; + tpmif->fe_state_path = NULL; + tpmif->state = XenbusStateInitialising; + tpmif->status = DISCONNECTED; + tpmif->page = NULL; + tpmif->flags = 0; + tpmif->opaque = NULL; + memset(tpmif->uuid, 0, sizeof(tpmif->uuid)); + return tpmif; +} + +void __free_tpmif(tpmif_t* tpmif) +{ + if(tpmif->fe_path) { + free(tpmif->fe_path); + } + if(tpmif->fe_state_path) { + free(tpmif->fe_state_path); + } + free(tpmif); +} +/* Creates a new tpm interface, adds it to the sorted array and returns it. + * returns NULL on error + * If the tpm interface already exists, it is returned*/ +tpmif_t* new_tpmif(domid_t domid, unsigned int handle) +{ + tpmif_t* tpmif; + char* err; + char path[512]; + + /* Make sure we haven't already created this tpm + * Double events can occur */ + if((tpmif = get_tpmif(domid, handle)) != NULL) { + return tpmif; + } + + tpmif = __init_tpmif(domid, handle); + + /* Get the uuid from xenstore */ + snprintf(path, 512, "backend/vtpm/%u/%u/uuid", (unsigned int) domid, handle); + if((!xenbus_read_uuid(path, tpmif->uuid))) { + TPMBACK_ERR("Error reading %s\n", path); + goto error; + } + + if(tpmif_change_state(tpmif, XenbusStateInitWait)) { + goto error; + } + + snprintf(path, 512, "backend/vtpm/%u/%u/frontend", (unsigned int) domid, handle); + if((err = xenbus_read(XBT_NIL, path, &tpmif->fe_path))) { + TPMBACK_ERR("Error creating new tpm instance xenbus_read(%s), Error = %s", path, err); + free(err); + goto error; + } + + /*Set the state path */ + tpmif->fe_state_path = malloc(strlen(tpmif->fe_path) + 7); + strcpy(tpmif->fe_state_path, tpmif->fe_path); + strcat(tpmif->fe_state_path, "/state"); + + if(insert_tpmif(tpmif)) { + goto error; + } + TPMBACK_DEBUG("New tpmif %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + /* Do the callback now */ + if(gtpmdev.open_callback) { + gtpmdev.open_callback(tpmif->domid, tpmif->handle); + } + return tpmif; +error: + __free_tpmif(tpmif); + return NULL; + +} + +/* Removes tpmif from dev->tpmlist and frees it's memory usage */ +void free_tpmif(tpmif_t* tpmif) +{ + char* err; + char path[512]; + TPMBACK_DEBUG("Free tpmif %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + if(tpmif->flags & TPMIF_CLOSED) { + TPMBACK_ERR("Tried to free an instance twice! Theres a bug somewhere!\n"); + BUG(); + } + tpmif->flags = TPMIF_CLOSED; + + tpmif_change_state(tpmif, XenbusStateClosing); + + /* Unmap share page and unbind event channel */ + if(tpmif->status == CONNECTED) { + tpmif->status = DISCONNECTING; + mask_evtchn(tpmif->evtchn); + + if(gntmap_munmap(>pmdev.map, (unsigned long)tpmif->page, 1)) { + TPMBACK_ERR("%u/%u Error occured while trying to unmap shared page\n", (unsigned int) tpmif->domid, tpmif->handle); + } + + unbind_evtchn(tpmif->evtchn); + } + tpmif->status = DISCONNECTED; + tpmif_change_state(tpmif, XenbusStateClosed); + + /* Do the callback now */ + if(gtpmdev.close_callback) { + gtpmdev.close_callback(tpmif->domid, tpmif->handle); + } + + /* remove from array */ + remove_tpmif(tpmif); + + /* Wake up anyone possibly waiting on this interface and let them exit */ + wake_up(&waitq); + schedule(); + + /* Remove the old xenbus entries */ + snprintf(path, 512, "backend/vtpm/%u/%u", (unsigned int) tpmif->domid, tpmif->handle); + if((err = xenbus_rm(XBT_NIL, path))) { + TPMBACK_ERR("Error cleaning up xenbus entries path=%s error=%s\n", path, err); + free(err); + } + + TPMBACK_LOG("Frontend %u/%u disconnected\n", (unsigned int) tpmif->domid, tpmif->handle); + + /* free memory */ + __free_tpmif(tpmif); + +} + +/********************** + * REMAINING TPMBACK FUNCTIONS + * ********************/ + +/*Event channel handler */ +void tpmback_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ + tpmif_t* tpmif = (tpmif_t*) data; + tpmif_shared_page_t *pg = tpmif->page; + + switch (pg->state) + { + case TPMIF_STATE_SUBMIT: + TPMBACK_DEBUG("EVENT CHANNEL FIRE %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + tpmif_req_ready(tpmif); + wake_up(&waitq); + break; + case TPMIF_STATE_CANCEL: + /* If we are busy with a request, do nothing */ + if (tpmif->flags & TPMIF_REQ_READY) + return; + /* Acknowledge the cancellation if we are idle */ + pg->state = TPMIF_STATE_IDLE; + wmb(); + notify_remote_via_evtchn(tpmif->evtchn); + return; + default: + /* Spurious wakeup; do nothing */ + return; + } +} + +/* Connect to frontend */ +int connect_fe(tpmif_t* tpmif) +{ + char path[512]; + char* err, *value; + uint32_t domid; + grant_ref_t ringref; + evtchn_port_t evtchn; + + /* If already connected then quit */ + if (tpmif->status == CONNECTED) { + TPMBACK_DEBUG("%u/%u tried to connect while it was already connected?\n", (unsigned int) tpmif->domid, tpmif->handle); + return 0; + } + + /* Fetch the grant reference */ + snprintf(path, 512, "%s/ring-ref", tpmif->fe_path); + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMBACK_ERR("Error creating new tpm instance xenbus_read(%s) Error = %s", path, err); + free(err); + return -1; + } + if(sscanf(value, "%d", &ringref) != 1) { + TPMBACK_ERR("Non integer value (%s) in %s ??\n", value, path); + free(value); + return -1; + } + free(value); + + + /* Fetch the event channel*/ + snprintf(path, 512, "%s/event-channel", tpmif->fe_path); + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMBACK_ERR("Error creating new tpm instance xenbus_read(%s) Error = %s", path, err); + free(err); + return -1; + } + if(sscanf(value, "%d", &evtchn) != 1) { + TPMBACK_ERR("Non integer value (%s) in %s ??\n", value, path); + free(value); + return -1; + } + free(value); + + /* Check that protocol v2 is being used */ + snprintf(path, 512, "%s/feature-protocol-v2", tpmif->fe_path); + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMBACK_ERR("Unable to read %s during tpmback initialization! error = %s\n", path, err); + free(err); + return -1; + } + if(strcmp(value, "1")) { + TPMBACK_ERR("%s has an invalid value (%s)\n", path, value); + free(value); + return -1; + } + free(value); + + domid = tpmif->domid; + if((tpmif->page = gntmap_map_grant_refs(>pmdev.map, 1, &domid, 0, &ringref, PROT_READ | PROT_WRITE)) == NULL) { + TPMBACK_ERR("Failed to map grant reference %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + return -1; + } + + /*Bind the event channel */ + if((evtchn_bind_interdomain(tpmif->domid, evtchn, tpmback_handler, tpmif, &tpmif->evtchn))) + { + TPMBACK_ERR("%u/%u Unable to bind to interdomain event channel!\n", (unsigned int) tpmif->domid, tpmif->handle); + goto error_post_map; + } + unmask_evtchn(tpmif->evtchn); + + /* Write the ready flag and change status to connected */ + snprintf(path, 512, "backend/vtpm/%u/%u", (unsigned int) tpmif->domid, tpmif->handle); + if((err = xenbus_printf(XBT_NIL, path, "ready", "%u", 1))) { + TPMBACK_ERR("%u/%u Unable to write ready flag on connect_fe()\n", (unsigned int) tpmif->domid, tpmif->handle); + free(err); + goto error_post_evtchn; + } + tpmif->status = CONNECTED; + if((tpmif_change_state(tpmif, XenbusStateConnected))){ + goto error_post_evtchn; + } + + TPMBACK_LOG("Frontend %u/%u connected\n", (unsigned int) tpmif->domid, tpmif->handle); + + return 0; +error_post_evtchn: + mask_evtchn(tpmif->evtchn); + unbind_evtchn(tpmif->evtchn); +error_post_map: + gntmap_munmap(>pmdev.map, (unsigned long)tpmif->page, 1); + return -1; +} + +static void disconnect_fe(tpmif_t* tpmif) +{ + if (tpmif->status == CONNECTED) { + tpmif->status = DISCONNECTING; + mask_evtchn(tpmif->evtchn); + + if(gntmap_munmap(>pmdev.map, (unsigned long)tpmif->page, 1)) { + TPMBACK_ERR("%u/%u Error occured while trying to unmap shared page\n", (unsigned int) tpmif->domid, tpmif->handle); + } + + unbind_evtchn(tpmif->evtchn); + } + tpmif->status = DISCONNECTED; + tpmif_change_state(tpmif, XenbusStateInitWait); + + TPMBACK_LOG("Frontend %u/%u disconnected\n", (unsigned int) tpmif->domid, tpmif->handle); +} + +static int frontend_changed(tpmif_t* tpmif) +{ + int state = xenbus_read_integer(tpmif->fe_state_path); + if(state < 0) { + state = XenbusStateUnknown; + } + + TPMBACK_DEBUG("Frontend %u/%u state changed to %d\n", (unsigned int) tpmif->domid, tpmif->handle, state); + + switch (state) { + case XenbusStateInitialising: + break; + + case XenbusStateInitialised: + case XenbusStateConnected: + if(connect_fe(tpmif)) { + TPMBACK_ERR("Failed to connect to front end %u/%u\n", (unsigned int) tpmif->domid, tpmif->handle); + tpmif_change_state(tpmif, XenbusStateClosed); + return -1; + } + break; + + case XenbusStateClosing: + tpmif_change_state(tpmif, XenbusStateClosing); + break; + + case XenbusStateClosed: + disconnect_fe(tpmif); + break; + + case XenbusStateUnknown: /* keep it here */ + free_tpmif(tpmif); + break; + + default: + TPMBACK_DEBUG("BAD STATE CHANGE %u/%u state = %d for tpmif\n", (unsigned int) tpmif->domid, tpmif->handle, state); + return -1; + } + return 0; +} + + +/* parses the string that comes out of xenbus_watch_wait_return. */ +static int parse_eventstr(const char* evstr, domid_t* domid, unsigned int* handle) +{ + int ret; + char cmd[40]; + char* err; + char* value; + unsigned int udomid = 0; + tpmif_t* tpmif; + /* First check for new frontends, this occurs when /backend/vtpm// gets created. Note we what the sscanf to fail on the last %s */ + if (sscanf(evstr, "backend/vtpm/%u/%u/%40s", &udomid, handle, cmd) == 2) { + *domid = udomid; + /* Make sure the entry exists, if this event triggers because the entry dissapeared then ignore it */ + if((err = xenbus_read(XBT_NIL, evstr, &value))) { + free(err); + return EV_NONE; + } + free(value); + /* Make sure the tpmif entry does not already exist, this should not happen */ + if((tpmif = get_tpmif(*domid, *handle)) != NULL) { + TPMBACK_DEBUG("Duplicate tpm entries! %u %u\n", tpmif->domid, tpmif->handle); + return EV_NONE; + } + return EV_NEWFE; + } else if((ret = sscanf(evstr, "/local/domain/%u/device/vtpm/%u/%40s", &udomid, handle, cmd)) == 3) { + *domid = udomid; + if (!strcmp(cmd, "state")) + return EV_STCHNG; + } + return EV_NONE; +} + +void handle_backend_event(char* evstr) { + tpmif_t* tpmif; + domid_t domid; + unsigned int handle; + int event; + + TPMBACK_DEBUG("Xenbus Event: %s\n", evstr); + + event = parse_eventstr(evstr, &domid, &handle); + + switch(event) { + case EV_NEWFE: + if(new_tpmif(domid, handle) == NULL) { + TPMBACK_ERR("Failed to create new tpm instance %u/%u\n", (unsigned int) domid, handle); + } + wake_up(&waitq); + break; + case EV_STCHNG: + if((tpmif = get_tpmif(domid, handle))) { + frontend_changed(tpmif); + } else { + TPMBACK_DEBUG("Event Received for non-existant tpm! instance=%u/%u xenbus_event=%s\n", (unsigned int) domid, handle, evstr); + } + break; + } +} + +/* Runs through the given path and creates events recursively + * for all of its children. + * @path - xenstore path to scan */ +static void generate_backend_events(const char* path) +{ + char* err; + int i, len; + char **dirs; + char *entry; + + if((err = xenbus_ls(XBT_NIL, path, &dirs)) != NULL) { + free(err); + return; + } + + for(i = 0; dirs[i] != NULL; ++i) { + len = strlen(path) + strlen(dirs[i]) + 2; + entry = malloc(len); + snprintf(entry, len, "%s/%s", path, dirs[i]); + + /* Generate and handle event for the entry itself */ + handle_backend_event(entry); + + /* Do children */ + generate_backend_events(entry); + + /* Cleanup */ + free(entry); + free(dirs[i]); + } + free(dirs); + return; +} + +void* tpmback_get_opaque(domid_t domid, unsigned int handle) +{ + tpmif_t* tpmif; + if((tpmif = get_tpmif(domid, handle)) == NULL) { + TPMBACK_DEBUG("get_opaque() failed, %u/%u is an invalid frontend\n", (unsigned int) domid, handle); + return NULL; + } + + return tpmif->opaque; +} + +int tpmback_set_opaque(domid_t domid, unsigned int handle, void *opaque) +{ + tpmif_t* tpmif; + if((tpmif = get_tpmif(domid, handle)) == NULL) { + TPMBACK_DEBUG("set_opaque() failed, %u/%u is an invalid frontend\n", (unsigned int) domid, handle); + return -1; + } + + tpmif->opaque = opaque; + return 0; +} + +unsigned char* tpmback_get_uuid(domid_t domid, unsigned int handle) +{ + tpmif_t* tpmif; + if((tpmif = get_tpmif(domid, handle)) == NULL) { + TPMBACK_DEBUG("get_uuid() failed, %u/%u is an invalid frontend\n", (unsigned int) domid, handle); + return NULL; + } + + return tpmif->uuid; +} + +int tpmback_get_peercontext(domid_t domid, unsigned int handle, void* buffer, int buflen) +{ + tpmif_t* tpmif; + if((tpmif = get_tpmif(domid, handle)) == NULL) { + TPMBACK_DEBUG("get_uuid() failed, %u/%u is an invalid frontend\n", (unsigned int) domid, handle); + return -1; + } + + return evtchn_get_peercontext(tpmif->evtchn, buffer, buflen); +} + +static void event_listener(void) +{ + const char* bepath = "backend/vtpm"; + char **path; + char* err; + + /* Setup the backend device watch */ + if((err = xenbus_watch_path_token(XBT_NIL, bepath, bepath, >pmdev.events)) != NULL) { + TPMBACK_ERR("xenbus_watch_path_token(%s) failed with error %s!\n", bepath, err); + free(err); + goto egress; + } + + /* Check for any frontends that connected before we set the watch. + * This is almost guaranteed to happen if both domains are started + * immediatly one after the other. + * We do this by manually generating events on everything in the backend + * path */ + generate_backend_events(bepath); + + /* Wait and listen for changes in frontend connections */ + while(1) { + path = xenbus_wait_for_watch_return(>pmdev.events); + + /*If quit flag was set then exit */ + if(gtpmdev.flags & TPMIF_CLOSED) { + TPMBACK_DEBUG("listener thread got quit event. Exiting..\n"); + free(path); + break; + } + handle_backend_event(*path); + free(path); + + } + + if((err = xenbus_unwatch_path_token(XBT_NIL, bepath, bepath)) != NULL) { + free(err); + } +egress: + return; +} + +void event_thread(void* p) { + event_listener(); +} + +void init_tpmback(void (*open_cb)(domid_t, unsigned int), void (*close_cb)(domid_t, unsigned int)) +{ + if(!globalinit) { + init_waitqueue_head(&waitq); + globalinit = 1; + } + printk("============= Init TPM BACK ================\n"); + gtpmdev.tpmlist = malloc(sizeof(tpmif_t*) * DEF_ARRAY_SIZE); + gtpmdev.num_alloc = DEF_ARRAY_SIZE; + gtpmdev.num_tpms = 0; + gtpmdev.flags = 0; + + gtpmdev.open_callback = open_cb; + gtpmdev.close_callback = close_cb; + + eventthread = create_thread("tpmback-listener", event_thread, NULL); + +} + +void shutdown_tpmback(void) +{ + TPMBACK_LOG("Shutting down tpm backend\n"); + /* Set the quit flag */ + gtpmdev.flags = TPMIF_CLOSED; + + //printk("num tpms is %d\n", gtpmdev.num_tpms); + /*Free all backend instances */ + while(gtpmdev.num_tpms) { + free_tpmif(gtpmdev.tpmlist[0]); + } + free(gtpmdev.tpmlist); + gtpmdev.tpmlist = NULL; + gtpmdev.num_alloc = 0; + + /* Wake up anyone possibly waiting on the device and let them exit */ + wake_up(&waitq); + schedule(); +} + +static void init_tpmcmd(tpmcmd_t* tpmcmd, domid_t domid, unsigned int handle, void *opaque) +{ + tpmcmd->domid = domid; + tpmcmd->locality = -1; + tpmcmd->handle = handle; + tpmcmd->opaque = opaque; + tpmcmd->req = NULL; + tpmcmd->req_len = 0; + tpmcmd->resp = NULL; + tpmcmd->resp_len = 0; +} + +tpmcmd_t* get_request(tpmif_t* tpmif) { + tpmcmd_t* cmd; + tpmif_shared_page_t *shr; + unsigned int offset; + int flags; +#ifdef TPMBACK_PRINT_DEBUG + int i; +#endif + + local_irq_save(flags); + + /* Allocate the cmd object to hold the data */ + if((cmd = malloc(sizeof(*cmd))) == NULL) { + goto error; + } + init_tpmcmd(cmd, tpmif->domid, tpmif->handle, tpmif->opaque); + + shr = tpmif->page; + cmd->req_len = shr->length; + cmd->locality = shr->locality; + offset = sizeof(*shr) + 4*shr->nr_extra_pages; + if (offset > PAGE_SIZE || offset + cmd->req_len > PAGE_SIZE) { + TPMBACK_ERR("%u/%u Command size too long for shared page!\n", (unsigned int) tpmif->domid, tpmif->handle); + goto error; + } + /* Allocate the buffer */ + if(cmd->req_len) { + if((cmd->req = malloc(cmd->req_len)) == NULL) { + goto error; + } + } + /* Copy the bits from the shared page(s) */ + memcpy(cmd->req, offset + (uint8_t*)shr, cmd->req_len); + +#ifdef TPMBACK_PRINT_DEBUG + TPMBACK_DEBUG("Received Tpm Command from %u/%u of size %u", (unsigned int) tpmif->domid, tpmif->handle, cmd->req_len); + for(i = 0; i < cmd->req_len; ++i) { + if (!(i % 30)) { + TPMBACK_DEBUG_MORE("\n"); + } + TPMBACK_DEBUG_MORE("%02hhX ", cmd->req[i]); + } + TPMBACK_DEBUG_MORE("\n\n"); +#endif + + local_irq_restore(flags); + return cmd; +error: + if(cmd != NULL) { + if (cmd->req != NULL) { + free(cmd->req); + cmd->req = NULL; + } + free(cmd); + cmd = NULL; + } + local_irq_restore(flags); + return NULL; + +} + +void send_response(tpmcmd_t* cmd, tpmif_t* tpmif) +{ + tpmif_shared_page_t *shr; + unsigned int offset; + int flags; +#ifdef TPMBACK_PRINT_DEBUG +int i; +#endif + + local_irq_save(flags); + + shr = tpmif->page; + shr->length = cmd->resp_len; + + offset = sizeof(*shr) + 4*shr->nr_extra_pages; + if (offset > PAGE_SIZE || offset + cmd->resp_len > PAGE_SIZE) { + TPMBACK_ERR("%u/%u Command size too long for shared page!\n", (unsigned int) tpmif->domid, tpmif->handle); + goto error; + } + memcpy(offset + (uint8_t*)shr, cmd->resp, cmd->resp_len); + +#ifdef TPMBACK_PRINT_DEBUG + TPMBACK_DEBUG("Sent response to %u/%u of size %u", (unsigned int) tpmif->domid, tpmif->handle, cmd->resp_len); + for(i = 0; i < cmd->resp_len; ++i) { + if (!(i % 30)) { + TPMBACK_DEBUG_MORE("\n"); + } + TPMBACK_DEBUG_MORE("%02hhX ", cmd->resp[i]); + } + TPMBACK_DEBUG_MORE("\n\n"); +#endif + /* clear the ready flag and send the event channel notice to the frontend */ + tpmif_req_finished(tpmif); + barrier(); + shr->state = TPMIF_STATE_FINISH; + wmb(); + notify_remote_via_evtchn(tpmif->evtchn); +error: + local_irq_restore(flags); + return; +} + +tpmcmd_t* tpmback_req_any(void) +{ + int i; + /* Block until something has a request */ + wait_event(waitq, (gtpmdev.flags & (TPMIF_REQ_READY | TPMIF_CLOSED))); + + /* Check if were shutting down */ + if(gtpmdev.flags & TPMIF_CLOSED) { + /* if something was waiting for us to give up the queue so it can shutdown, let it finish */ + schedule(); + return NULL; + } + + for(i = 0; i < gtpmdev.num_tpms; ++i) { + if(gtpmdev.tpmlist[i]->flags & TPMIF_REQ_READY) { + return get_request(gtpmdev.tpmlist[i]); + } + } + + TPMBACK_ERR("backend request ready flag was set but no interfaces were actually ready\n"); + return NULL; +} + +tpmcmd_t* tpmback_req(domid_t domid, unsigned int handle) +{ + tpmif_t* tpmif; + tpmif = get_tpmif(domid, handle); + if(tpmif == NULL) { + return NULL; + } + + wait_event(waitq, (tpmif->flags & (TPMIF_REQ_READY | TPMIF_CLOSED) || gtpmdev.flags & TPMIF_CLOSED)); + + /* Check if were shutting down */ + if(tpmif->flags & TPMIF_CLOSED || gtpmdev.flags & TPMIF_CLOSED) { + /* if something was waiting for us to give up the queue so it can free this instance, let it finish */ + schedule(); + return NULL; + } + + return get_request(tpmif); +} + +void tpmback_resp(tpmcmd_t* tpmcmd) +{ + tpmif_t* tpmif; + + /* Get the associated interface, if it doesnt exist then just quit */ + tpmif = get_tpmif(tpmcmd->domid, tpmcmd->handle); + if(tpmif == NULL) { + TPMBACK_ERR("Tried to send a reponse to non existant frontend %u/%u\n", (unsigned int) tpmcmd->domid, tpmcmd->handle); + goto end; + } + + if(!(tpmif->flags & TPMIF_REQ_READY)) { + TPMBACK_ERR("Tried to send response to a frontend that was not waiting for one %u/%u\n", (unsigned int) tpmcmd->domid, tpmcmd->handle); + goto end; + } + + /* Send response to frontend */ + send_response(tpmcmd, tpmif); + +end: + if(tpmcmd->req != NULL) { + free(tpmcmd->req); + } + free(tpmcmd); + return; +} + +int tpmback_wait_for_frontend_connect(domid_t *domid, unsigned int *handle) +{ + tpmif_t* tpmif; + int flags; + wait_event(waitq, ((gtpmdev.num_tpms > 0) || gtpmdev.flags & TPMIF_CLOSED)); + if(gtpmdev.flags & TPMIF_CLOSED) { + return -1; + } + local_irq_save(flags); + tpmif = gtpmdev.tpmlist[0]; + *domid = tpmif->domid; + *handle = tpmif->handle; + local_irq_restore(flags); + + return 0; +} + +int tpmback_num_frontends(void) +{ + return gtpmdev.num_tpms; +} diff -Nru xen-4.6.0/extras/mini-os/tpmfront.c xen-4.6.5/extras/mini-os/tpmfront.c --- xen-4.6.0/extras/mini-os/tpmfront.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/tpmfront.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,631 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/char/tpm_vtpm.c + * from the xen 2.6.18 linux kernel + * + * Copyright (C) 2006 IBM Corporation + * + * This code has also been derived from drivers/char/tpm_xen.c + * from the xen 2.6.18 linux kernel + * + * Copyright (c) 2005, IBM Corporation + * + * which was itself derived from drivers/xen/netfront/netfront.c + * from the linux kernel + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#define TPMFRONT_PRINT_DEBUG +#ifdef TPMFRONT_PRINT_DEBUG +#define TPMFRONT_DEBUG(fmt,...) printk("Tpmfront:Debug("__FILE__":%d) " fmt, __LINE__, ##__VA_ARGS__) +#define TPMFRONT_DEBUG_MORE(fmt,...) printk(fmt, ##__VA_ARGS__) +#else +#define TPMFRONT_DEBUG(fmt,...) +#endif +#define TPMFRONT_ERR(fmt,...) printk("Tpmfront:Error " fmt, ##__VA_ARGS__) +#define TPMFRONT_LOG(fmt,...) printk("Tpmfront:Info " fmt, ##__VA_ARGS__) + +#define min(a,b) (((a) < (b)) ? (a) : (b)) + +void tpmfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) { + struct tpmfront_dev* dev = (struct tpmfront_dev*) data; + tpmif_shared_page_t *shr = dev->page; + /*If we get a response when we didnt make a request, just ignore it */ + if(!dev->waiting) { + return; + } + + switch (shr->state) { + case TPMIF_STATE_FINISH: /* request was completed */ + case TPMIF_STATE_IDLE: /* request was cancelled */ + break; + default: + /* Spurious wakeup; do nothing, request is still pending */ + return; + } + + dev->waiting = 0; +#ifdef HAVE_LIBC + if(dev->fd >= 0) { + files[dev->fd].read = 1; + } +#endif + wake_up(&dev->waitq); +} + +static int publish_xenbus(struct tpmfront_dev* dev) { + xenbus_transaction_t xbt; + int retry; + char* err; + /* Write the grant reference and event channel to xenstore */ +again: + if((err = xenbus_transaction_start(&xbt))) { + TPMFRONT_ERR("Unable to start xenbus transaction, error was %s\n", err); + free(err); + return -1; + } + + if((err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u", (unsigned int) dev->ring_ref))) { + TPMFRONT_ERR("Unable to write %s/ring-ref, error was %s\n", dev->nodename, err); + free(err); + goto abort_transaction; + } + + if((err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", (unsigned int) dev->evtchn))) { + TPMFRONT_ERR("Unable to write %s/event-channel, error was %s\n", dev->nodename, err); + free(err); + goto abort_transaction; + } + + if((err = xenbus_transaction_end(xbt, 0, &retry))) { + TPMFRONT_ERR("Unable to complete xenbus transaction, error was %s\n", err); + free(err); + return -1; + } + if(retry) { + goto again; + } + + return 0; +abort_transaction: + if((err = xenbus_transaction_end(xbt, 1, &retry))) { + free(err); + } + return -1; +} + +static int wait_for_backend_connect(xenbus_event_queue* events, char* path) +{ + int state; + + TPMFRONT_LOG("Waiting for backend connection..\n"); + /* Wait for the backend to connect */ + while(1) { + state = xenbus_read_integer(path); + if ( state < 0) + state = XenbusStateUnknown; + switch(state) { + /* Bad states, we quit with error */ + case XenbusStateUnknown: + case XenbusStateClosing: + case XenbusStateClosed: + TPMFRONT_ERR("Unable to connect to backend\n"); + return -1; + /* If backend is connected then break out of loop */ + case XenbusStateConnected: + TPMFRONT_LOG("Backend Connected\n"); + return 0; + default: + xenbus_wait_for_watch(events); + } + } + +} + +static int wait_for_backend_closed(xenbus_event_queue* events, char* path) +{ + int state; + + TPMFRONT_LOG("Waiting for backend to close..\n"); + while(1) { + state = xenbus_read_integer(path); + if ( state < 0) + state = XenbusStateUnknown; + switch(state) { + case XenbusStateUnknown: + TPMFRONT_ERR("Backend Unknown state, forcing shutdown\n"); + return -1; + case XenbusStateClosed: + TPMFRONT_LOG("Backend Closed\n"); + return 0; + case XenbusStateInitWait: + TPMFRONT_LOG("Backend Closed (waiting for reconnect)\n"); + return 0; + default: + xenbus_wait_for_watch(events); + } + } + +} + +static int wait_for_backend_state_changed(struct tpmfront_dev* dev, XenbusState state) { + char* err; + int ret = 0; + xenbus_event_queue events = NULL; + char path[512]; + + snprintf(path, 512, "%s/state", dev->bepath); + /*Setup the watch to wait for the backend */ + if((err = xenbus_watch_path_token(XBT_NIL, path, path, &events))) { + TPMFRONT_ERR("Could not set a watch on %s, error was %s\n", path, err); + free(err); + return -1; + } + + /* Do the actual wait loop now */ + switch(state) { + case XenbusStateConnected: + ret = wait_for_backend_connect(&events, path); + break; + case XenbusStateClosed: + ret = wait_for_backend_closed(&events, path); + break; + default: + TPMFRONT_ERR("Bad wait state %d, ignoring\n", state); + } + + if((err = xenbus_unwatch_path_token(XBT_NIL, path, path))) { + TPMFRONT_ERR("Unable to unwatch %s, error was %s, ignoring..\n", path, err); + free(err); + } + return ret; +} + +static int tpmfront_connect(struct tpmfront_dev* dev) +{ + char* err; + /* Create shared page */ + dev->page = (tpmif_shared_page_t *)alloc_page(); + if(dev->page == NULL) { + TPMFRONT_ERR("Unable to allocate page for shared memory\n"); + goto error; + } + memset(dev->page, 0, PAGE_SIZE); + dev->ring_ref = gnttab_grant_access(dev->bedomid, virt_to_mfn(dev->page), 0); + TPMFRONT_DEBUG("grant ref is %lu\n", (unsigned long) dev->ring_ref); + + /*Create event channel */ + if(evtchn_alloc_unbound(dev->bedomid, tpmfront_handler, dev, &dev->evtchn)) { + TPMFRONT_ERR("Unable to allocate event channel\n"); + goto error_postmap; + } + unmask_evtchn(dev->evtchn); + TPMFRONT_DEBUG("event channel is %lu\n", (unsigned long) dev->evtchn); + + /* Write the entries to xenstore */ + if(publish_xenbus(dev)) { + goto error_postevtchn; + } + + /* Change state to connected */ + dev->state = XenbusStateConnected; + + /* Tell the backend that we are ready */ + if((err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%u", dev->state))) { + TPMFRONT_ERR("Unable to write to xenstore %s/state, value=%u", dev->nodename, XenbusStateConnected); + free(err); + goto error; + } + + return 0; +error_postevtchn: + mask_evtchn(dev->evtchn); + unbind_evtchn(dev->evtchn); +error_postmap: + gnttab_end_access(dev->ring_ref); + free_page(dev->page); +error: + return -1; +} + +struct tpmfront_dev* init_tpmfront(const char* _nodename) +{ + struct tpmfront_dev* dev; + const char* nodename; + char path[512]; + char* value, *err; + unsigned long long ival; + + printk("============= Init TPM Front ================\n"); + + dev = malloc(sizeof(struct tpmfront_dev)); + memset(dev, 0, sizeof(struct tpmfront_dev)); + +#ifdef HAVE_LIBC + dev->fd = -1; +#endif + + nodename = _nodename ? _nodename : "device/vtpm/0"; + dev->nodename = strdup(nodename); + + init_waitqueue_head(&dev->waitq); + + /* Get backend domid */ + snprintf(path, 512, "%s/backend-id", dev->nodename); + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMFRONT_ERR("Unable to read %s during tpmfront initialization! error = %s\n", path, err); + free(err); + goto error; + } + if(sscanf(value, "%llu", &ival) != 1) { + TPMFRONT_ERR("%s has non-integer value (%s)\n", path, value); + free(value); + goto error; + } + free(value); + dev->bedomid = ival; + + /* Get backend xenstore path */ + snprintf(path, 512, "%s/backend", dev->nodename); + if((err = xenbus_read(XBT_NIL, path, &dev->bepath))) { + TPMFRONT_ERR("Unable to read %s during tpmfront initialization! error = %s\n", path, err); + free(err); + goto error; + } + + /* Publish protocol v2 feature */ + snprintf(path, 512, "%s/feature-protocol-v2", dev->nodename); + if ((err = xenbus_write(XBT_NIL, path, "1"))) + { + TPMFRONT_ERR("Unable to write feature-protocol-v2 node: %s\n", err); + free(err); + goto error; + } + + /* Create and publish grant reference and event channel */ + if (tpmfront_connect(dev)) { + goto error; + } + + /* Wait for backend to connect */ + if( wait_for_backend_state_changed(dev, XenbusStateConnected)) { + goto error; + } + + /* Ensure backend is also using protocol v2 */ + snprintf(path, 512, "%s/feature-protocol-v2", dev->bepath); + if((err = xenbus_read(XBT_NIL, path, &value))) { + TPMFRONT_ERR("Unable to read %s during tpmfront initialization! error = %s\n", path, err); + free(err); + goto error; + } + if(strcmp(value, "1")) { + TPMFRONT_ERR("%s has an invalid value (%s)\n", path, value); + free(value); + goto error; + } + free(value); + + TPMFRONT_LOG("Initialization Completed successfully\n"); + + return dev; + +error: + shutdown_tpmfront(dev); + return NULL; +} +void shutdown_tpmfront(struct tpmfront_dev* dev) +{ + char* err; + char path[512]; + if(dev == NULL) { + return; + } + TPMFRONT_LOG("Shutting down tpmfront\n"); + /* disconnect */ + if(dev->state == XenbusStateConnected) { + /* Tell backend we are closing */ + dev->state = XenbusStateClosing; + if((err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%u", (unsigned int) dev->state))) { + TPMFRONT_ERR("Unable to write to %s, error was %s", dev->nodename, err); + free(err); + } + + /* Clean up xenstore entries */ + snprintf(path, 512, "%s/event-channel", dev->nodename); + if((err = xenbus_rm(XBT_NIL, path))) { + free(err); + } + snprintf(path, 512, "%s/ring-ref", dev->nodename); + if((err = xenbus_rm(XBT_NIL, path))) { + free(err); + } + + /* Tell backend we are closed */ + dev->state = XenbusStateClosed; + if((err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%u", (unsigned int) dev->state))) { + TPMFRONT_ERR("Unable to write to %s, error was %s", dev->nodename, err); + free(err); + } + + /* Wait for the backend to close and unmap shared pages, ignore any errors */ + wait_for_backend_state_changed(dev, XenbusStateClosed); + + /* Prepare for a later reopen (possibly by a kexec'd kernel) */ + dev->state = XenbusStateInitialising; + if((err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%u", (unsigned int) dev->state))) { + TPMFRONT_ERR("Unable to write to %s, error was %s", dev->nodename, err); + free(err); + } + + /* Close event channel and unmap shared page */ + mask_evtchn(dev->evtchn); + unbind_evtchn(dev->evtchn); + gnttab_end_access(dev->ring_ref); + + free_page(dev->page); + } + + /* Cleanup memory usage */ + if(dev->respbuf) { + free(dev->respbuf); + } + if(dev->bepath) { + free(dev->bepath); + } + if(dev->nodename) { + free(dev->nodename); + } + free(dev); +} + +int tpmfront_send(struct tpmfront_dev* dev, const uint8_t* msg, size_t length) +{ + unsigned int offset; + tpmif_shared_page_t *shr = NULL; +#ifdef TPMFRONT_PRINT_DEBUG + int i; +#endif + /* Error Checking */ + if(dev == NULL || dev->state != XenbusStateConnected) { + TPMFRONT_ERR("Tried to send message through disconnected frontend\n"); + return -1; + } + shr = dev->page; + +#ifdef TPMFRONT_PRINT_DEBUG + TPMFRONT_DEBUG("Sending Msg to backend size=%u", (unsigned int) length); + for(i = 0; i < length; ++i) { + if(!(i % 30)) { + TPMFRONT_DEBUG_MORE("\n"); + } + TPMFRONT_DEBUG_MORE("%02X ", msg[i]); + } + TPMFRONT_DEBUG_MORE("\n"); +#endif + + /* Copy to shared pages now */ + offset = sizeof(*shr); + if (length + offset > PAGE_SIZE) { + TPMFRONT_ERR("Message too long for shared page\n"); + return -1; + } + memcpy(offset + (uint8_t*)shr, msg, length); + shr->length = length; + barrier(); + shr->state = TPMIF_STATE_SUBMIT; + + dev->waiting = 1; + dev->resplen = 0; +#ifdef HAVE_LIBC + if(dev->fd >= 0) { + files[dev->fd].read = 0; + files[dev->fd].tpmfront.respgot = 0; + files[dev->fd].tpmfront.offset = 0; + } +#endif + wmb(); + notify_remote_via_evtchn(dev->evtchn); + return 0; +} +int tpmfront_recv(struct tpmfront_dev* dev, uint8_t** msg, size_t *length) +{ + unsigned int offset; + tpmif_shared_page_t *shr = NULL; +#ifdef TPMFRONT_PRINT_DEBUG +int i; +#endif + if(dev == NULL || dev->state != XenbusStateConnected) { + TPMFRONT_ERR("Tried to receive message from disconnected frontend\n"); + return -1; + } + /*Wait for the response */ + wait_event(dev->waitq, (!dev->waiting)); + shr = dev->page; + + /* Initialize */ + *msg = NULL; + *length = 0; + offset = sizeof(*shr); + + if (shr->state != TPMIF_STATE_FINISH) + goto quit; + + *length = shr->length; + + if (*length + offset > PAGE_SIZE) { + TPMFRONT_ERR("Reply too long for shared page\n"); + return -1; + } + + /* Alloc the buffer */ + if(dev->respbuf) { + free(dev->respbuf); + } + *msg = dev->respbuf = malloc(*length); + dev->resplen = *length; + + /* Copy the bits */ + memcpy(*msg, offset + (uint8_t*)shr, *length); + +#ifdef TPMFRONT_PRINT_DEBUG + TPMFRONT_DEBUG("Received response from backend size=%u", (unsigned int) *length); + for(i = 0; i < *length; ++i) { + if(!(i % 30)) { + TPMFRONT_DEBUG_MORE("\n"); + } + TPMFRONT_DEBUG_MORE("%02X ", (*msg)[i]); + } + TPMFRONT_DEBUG_MORE("\n"); +#endif +#ifdef HAVE_LIBC + if(dev->fd >= 0) { + files[dev->fd].tpmfront.respgot = 1; + } +#endif +quit: + return 0; +} + +int tpmfront_cmd(struct tpmfront_dev* dev, uint8_t* req, size_t reqlen, uint8_t** resp, size_t* resplen) +{ + int rc; + if((rc = tpmfront_send(dev, req, reqlen))) { + return rc; + } + if((rc = tpmfront_recv(dev, resp, resplen))) { + return rc; + } + + return 0; +} + +int tpmfront_set_locality(struct tpmfront_dev* dev, int locality) +{ + if (!dev || !dev->page) + return -1; + dev->page->locality = locality; + return 0; +} + +#ifdef HAVE_LIBC +#include +int tpmfront_open(struct tpmfront_dev* dev) +{ + /* Silently prevent multiple opens */ + if(dev->fd != -1) { + return dev->fd; + } + + dev->fd = alloc_fd(FTYPE_TPMFRONT); + printk("tpmfront_open(%s) -> %d\n", dev->nodename, dev->fd); + files[dev->fd].tpmfront.dev = dev; + files[dev->fd].tpmfront.offset = 0; + files[dev->fd].tpmfront.respgot = 0; + return dev->fd; +} + +int tpmfront_posix_write(int fd, const uint8_t* buf, size_t count) +{ + int rc; + struct tpmfront_dev* dev; + dev = files[fd].tpmfront.dev; + + if(count == 0) { + return 0; + } + + /* Return an error if we are already processing a command */ + if(dev->waiting) { + errno = EINPROGRESS; + return -1; + } + /* Send the command now */ + if((rc = tpmfront_send(dev, buf, count)) != 0) { + errno = EIO; + return -1; + } + return count; +} + +int tpmfront_posix_read(int fd, uint8_t* buf, size_t count) +{ + int rc; + uint8_t* dummybuf; + size_t dummysz; + struct tpmfront_dev* dev; + + dev = files[fd].tpmfront.dev; + + if(count == 0) { + return 0; + } + + /* get the response if we haven't already */ + if(files[dev->fd].tpmfront.respgot == 0) { + if ((rc = tpmfront_recv(dev, &dummybuf, &dummysz)) != 0) { + errno = EIO; + return -1; + } + } + + /* handle EOF case */ + if(files[dev->fd].tpmfront.offset >= dev->resplen) { + return 0; + } + + /* Compute the number of bytes and do the copy operation */ + if((rc = min(count, dev->resplen - files[dev->fd].tpmfront.offset)) != 0) { + memcpy(buf, dev->respbuf + files[dev->fd].tpmfront.offset, rc); + files[dev->fd].tpmfront.offset += rc; + } + + return rc; +} + +int tpmfront_posix_fstat(int fd, struct stat* buf) +{ + uint8_t* dummybuf; + size_t dummysz; + int rc; + struct tpmfront_dev* dev = files[fd].tpmfront.dev; + + /* If we have a response waiting, then read it now from the backend + * so we can get its length*/ + if(dev->waiting || (files[dev->fd].read == 1 && !files[dev->fd].tpmfront.respgot)) { + if ((rc = tpmfront_recv(dev, &dummybuf, &dummysz)) != 0) { + errno = EIO; + return -1; + } + } + + buf->st_mode = O_RDWR; + buf->st_uid = 0; + buf->st_gid = 0; + buf->st_size = dev->resplen; + buf->st_atime = buf->st_mtime = buf->st_ctime = time(NULL); + + return 0; +} + + +#endif diff -Nru xen-4.6.0/extras/mini-os/tpm_tis.c xen-4.6.5/extras/mini-os/tpm_tis.c --- xen-4.6.0/extras/mini-os/tpm_tis.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/tpm_tis.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,1523 @@ +/* + * Copyright (c) 2010-2012 United States Government, as represented by + * the Secretary of Defense. All rights reserved. + * + * This code has been derived from drivers/char/tpm.c + * from the linux kernel + * + * Copyright (C) 2004 IBM Corporation + * + * This code has also been derived from drivers/char/tpm/tpm_tis.c + * from the linux kernel + * + * Copyright (C) 2005, 2006 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2 + * of the License + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef min + #define min( a, b ) ( ((a) < (b)) ? (a) : (b) ) +#endif +#define ADJUST_TIMEOUTS_TO_STANDARD(initial,standard,timeout_no) \ + if((initial) < (standard)){ \ + (initial) = (standard); \ + printk("Timeout %c was adjusted to standard value.\n",timeout_no); \ + } + +#define TPM_HEADER_SIZE 10 + +#define TPM_BUFSIZE 2048 + +struct tpm_input_header { + uint16_t tag; + uint32_t length; + uint32_t ordinal; +}__attribute__((packed)); + +struct tpm_output_header { + uint16_t tag; + uint32_t length; + uint32_t return_code; +}__attribute__((packed)); + +struct stclear_flags_t { + uint16_t tag; + uint8_t deactivated; + uint8_t disableForceClear; + uint8_t physicalPresence; + uint8_t physicalPresenceLock; + uint8_t bGlobalLock; +}__attribute__((packed)); + +struct tpm_version_t { + uint8_t Major; + uint8_t Minor; + uint8_t revMajor; + uint8_t revMinor; +}__attribute__((packed)); + +struct tpm_version_1_2_t { + uint16_t tag; + uint8_t Major; + uint8_t Minor; + uint8_t revMajor; + uint8_t revMinor; +}__attribute__((packed)); + +struct timeout_t { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; +}__attribute__((packed)); + +struct duration_t { + uint32_t tpm_short; + uint32_t tpm_medium; + uint32_t tpm_long; +}__attribute__((packed)); + +struct permanent_flags_t { + uint16_t tag; + uint8_t disable; + uint8_t ownership; + uint8_t deactivated; + uint8_t readPubek; + uint8_t disableOwnerClear; + uint8_t allowMaintenance; + uint8_t physicalPresenceLifetimeLock; + uint8_t physicalPresenceHWEnable; + uint8_t physicalPresenceCMDEnable; + uint8_t CEKPUsed; + uint8_t TPMpost; + uint8_t TPMpostLock; + uint8_t FIPS; + uint8_t operator; + uint8_t enableRevokeEK; + uint8_t nvLocked; + uint8_t readSRKPub; + uint8_t tpmEstablished; + uint8_t maintenanceDone; + uint8_t disableFullDALogicInfo; +}__attribute__((packed)); + +typedef union { + struct permanent_flags_t perm_flags; + struct stclear_flags_t stclear_flags; + bool owned; + uint32_t num_pcrs; + struct tpm_version_t tpm_version; + struct tpm_version_1_2_t tpm_version_1_2; + uint32_t manufacturer_id; + struct timeout_t timeout; + struct duration_t duration; +} cap_t; + +struct tpm_getcap_params_in { + uint32_t cap; + uint32_t subcap_size; + uint32_t subcap; +}__attribute__((packed)); + +struct tpm_getcap_params_out { + uint32_t cap_size; + cap_t cap; +}__attribute__((packed)); + +struct tpm_readpubek_params_out { + uint8_t algorithm[4]; + uint8_t encscheme[2]; + uint8_t sigscheme[2]; + uint32_t paramsize; + uint8_t parameters[12]; /*assuming RSA*/ + uint32_t keysize; + uint8_t modulus[256]; + uint8_t checksum[20]; +}__attribute__((packed)); + +typedef union { + struct tpm_input_header in; + struct tpm_output_header out; +} tpm_cmd_header; + +#define TPM_DIGEST_SIZE 20 +struct tpm_pcrread_out { + uint8_t pcr_result[TPM_DIGEST_SIZE]; +}__attribute__((packed)); + +struct tpm_pcrread_in { + uint32_t pcr_idx; +}__attribute__((packed)); + +struct tpm_pcrextend_in { + uint32_t pcr_idx; + uint8_t hash[TPM_DIGEST_SIZE]; +}__attribute__((packed)); + +typedef union { + struct tpm_getcap_params_out getcap_out; + struct tpm_readpubek_params_out readpubek_out; + uint8_t readpubek_out_buffer[sizeof(struct tpm_readpubek_params_out)]; + struct tpm_getcap_params_in getcap_in; + struct tpm_pcrread_in pcrread_in; + struct tpm_pcrread_out pcrread_out; + struct tpm_pcrextend_in pcrextend_in; +} tpm_cmd_params; + +struct tpm_cmd_t { + tpm_cmd_header header; + tpm_cmd_params params; +}__attribute__((packed)); + + +enum tpm_duration { + TPM_SHORT = 0, + TPM_MEDIUM = 1, + TPM_LONG = 2, + TPM_UNDEFINED, +}; + +#define TPM_MAX_ORDINAL 243 +#define TPM_MAX_PROTECTED_ORDINAL 12 +#define TPM_PROTECTED_ORDINAL_MASK 0xFF + +extern const uint8_t tpm_protected_ordinal_duration[TPM_MAX_PROTECTED_ORDINAL]; +extern const uint8_t tpm_ordinal_duration[TPM_MAX_ORDINAL]; + +#define TPM_DIGEST_SIZE 20 +#define TPM_ERROR_SIZE 10 +#define TPM_RET_CODE_IDX 6 + +/* tpm_capabilities */ +#define TPM_CAP_FLAG cpu_to_be32(4) +#define TPM_CAP_PROP cpu_to_be32(5) +#define CAP_VERSION_1_1 cpu_to_be32(0x06) +#define CAP_VERSION_1_2 cpu_to_be32(0x1A) + +/* tpm_sub_capabilities */ +#define TPM_CAP_PROP_PCR cpu_to_be32(0x101) +#define TPM_CAP_PROP_MANUFACTURER cpu_to_be32(0x103) +#define TPM_CAP_FLAG_PERM cpu_to_be32(0x108) +#define TPM_CAP_FLAG_VOL cpu_to_be32(0x109) +#define TPM_CAP_PROP_OWNER cpu_to_be32(0x111) +#define TPM_CAP_PROP_TIS_TIMEOUT cpu_to_be32(0x115) +#define TPM_CAP_PROP_TIS_DURATION cpu_to_be32(0x120) + + +#define TPM_INTERNAL_RESULT_SIZE 200 +#define TPM_TAG_RQU_COMMAND cpu_to_be16(193) +#define TPM_ORD_GET_CAP cpu_to_be32(101) + +extern const struct tpm_input_header tpm_getcap_header; + + + +const uint8_t tpm_protected_ordinal_duration[TPM_MAX_PROTECTED_ORDINAL] = { + TPM_UNDEFINED, /* 0 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 5 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 10 */ + TPM_SHORT, +}; + +const uint8_t tpm_ordinal_duration[TPM_MAX_ORDINAL] = { + TPM_UNDEFINED, /* 0 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 5 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 10 */ + TPM_SHORT, + TPM_MEDIUM, + TPM_LONG, + TPM_LONG, + TPM_MEDIUM, /* 15 */ + TPM_SHORT, + TPM_SHORT, + TPM_MEDIUM, + TPM_LONG, + TPM_SHORT, /* 20 */ + TPM_SHORT, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_SHORT, /* 25 */ + TPM_SHORT, + TPM_MEDIUM, + TPM_SHORT, + TPM_SHORT, + TPM_MEDIUM, /* 30 */ + TPM_LONG, + TPM_MEDIUM, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, /* 35 */ + TPM_MEDIUM, + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_MEDIUM, /* 40 */ + TPM_LONG, + TPM_MEDIUM, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, /* 45 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_LONG, + TPM_MEDIUM, /* 50 */ + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 55 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_MEDIUM, /* 60 */ + TPM_MEDIUM, + TPM_MEDIUM, + TPM_SHORT, + TPM_SHORT, + TPM_MEDIUM, /* 65 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 70 */ + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 75 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_LONG, /* 80 */ + TPM_UNDEFINED, + TPM_MEDIUM, + TPM_LONG, + TPM_SHORT, + TPM_UNDEFINED, /* 85 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 90 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, /* 95 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_MEDIUM, /* 100 */ + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 105 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 110 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, /* 115 */ + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_LONG, /* 120 */ + TPM_LONG, + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_SHORT, + TPM_SHORT, /* 125 */ + TPM_SHORT, + TPM_LONG, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, /* 130 */ + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_SHORT, + TPM_MEDIUM, + TPM_UNDEFINED, /* 135 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 140 */ + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 145 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 150 */ + TPM_MEDIUM, + TPM_MEDIUM, + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, /* 155 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 160 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 165 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_LONG, /* 170 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 175 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_MEDIUM, /* 180 */ + TPM_SHORT, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_MEDIUM, /* 185 */ + TPM_SHORT, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 190 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 195 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 200 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, + TPM_SHORT, /* 205 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_MEDIUM, /* 210 */ + TPM_UNDEFINED, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_MEDIUM, + TPM_UNDEFINED, /* 215 */ + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, + TPM_SHORT, /* 220 */ + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_SHORT, + TPM_UNDEFINED, /* 225 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 230 */ + TPM_LONG, + TPM_MEDIUM, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, /* 235 */ + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_UNDEFINED, + TPM_SHORT, /* 240 */ + TPM_UNDEFINED, + TPM_MEDIUM, +}; + +const struct tpm_input_header tpm_getcap_header = { + .tag = TPM_TAG_RQU_COMMAND, + .length = cpu_to_be32(22), + .ordinal = TPM_ORD_GET_CAP +}; + + +enum tis_access { + TPM_ACCESS_VALID = 0x80, + TPM_ACCESS_ACTIVE_LOCALITY = 0x20, /* (R) */ + TPM_ACCESS_RELINQUISH_LOCALITY = 0x20,/* (W) */ + TPM_ACCESS_REQUEST_PENDING = 0x04, /* (W) */ + TPM_ACCESS_REQUEST_USE = 0x02, /* (W) */ +}; + +enum tis_status { + TPM_STS_VALID = 0x80, /* (R) */ + TPM_STS_COMMAND_READY = 0x40, /* (R) */ + TPM_STS_DATA_AVAIL = 0x10, /* (R) */ + TPM_STS_DATA_EXPECT = 0x08, /* (R) */ + TPM_STS_GO = 0x20, /* (W) */ +}; + +enum tis_int_flags { + TPM_GLOBAL_INT_ENABLE = 0x80000000, + TPM_INTF_BURST_COUNT_STATIC = 0x100, + TPM_INTF_CMD_READY_INT = 0x080, + TPM_INTF_INT_EDGE_FALLING = 0x040, + TPM_INTF_INT_EDGE_RISING = 0x020, + TPM_INTF_INT_LEVEL_LOW = 0x010, + TPM_INTF_INT_LEVEL_HIGH = 0x008, + TPM_INTF_LOCALITY_CHANGE_INT = 0x004, + TPM_INTF_STS_VALID_INT = 0x002, + TPM_INTF_DATA_AVAIL_INT = 0x001, +}; + +enum tis_defaults { + TIS_MEM_BASE = 0xFED40000, + TIS_MEM_LEN = 0x5000, + TIS_SHORT_TIMEOUT = 750, /*ms*/ + TIS_LONG_TIMEOUT = 2000, /*2 sec */ +}; + +#define TPM_TIMEOUT 5 + +#define TPM_ACCESS(t, l) (((uint8_t*)t->pages[l]) + 0x0000) +#define TPM_INT_ENABLE(t, l) ((uint32_t*)(((uint8_t*)t->pages[l]) + 0x0008)) +#define TPM_INT_VECTOR(t, l) (((uint8_t*)t->pages[l]) + 0x000C) +#define TPM_INT_STATUS(t, l) (((uint8_t*)t->pages[l]) + 0x0010) +#define TPM_INTF_CAPS(t, l) ((uint32_t*)(((uint8_t*)t->pages[l]) + 0x0014)) +#define TPM_STS(t, l) ((uint8_t*)(((uint8_t*)t->pages[l]) + 0x0018)) +#define TPM_DATA_FIFO(t, l) (((uint8_t*)t->pages[l]) + 0x0024) + +#define TPM_DID_VID(t, l) ((uint32_t*)(((uint8_t*)t->pages[l]) + 0x0F00)) +#define TPM_RID(t, l) (((uint8_t*)t->pages[l]) + 0x0F04) + +struct tpm_chip { + int enabled_localities; + int locality; + unsigned long baseaddr; + uint8_t* pages[5]; + int did, vid, rid; + + uint8_t data_buffer[TPM_BUFSIZE]; + int data_len; + + s_time_t timeout_a, timeout_b, timeout_c, timeout_d; + s_time_t duration[3]; + +#ifdef HAVE_LIBC + int fd; +#endif + + unsigned int irq; + struct wait_queue_head read_queue; + struct wait_queue_head int_queue; +}; + + +static void __init_tpm_chip(struct tpm_chip* tpm) { + tpm->enabled_localities = TPM_TIS_EN_LOCLALL; + tpm->locality = -1; + tpm->baseaddr = 0; + tpm->pages[0] = tpm->pages[1] = tpm->pages[2] = tpm->pages[3] = tpm->pages[4] = NULL; + tpm->vid = 0; + tpm->did = 0; + tpm->irq = 0; + init_waitqueue_head(&tpm->read_queue); + init_waitqueue_head(&tpm->int_queue); + + tpm->data_len = -1; + +#ifdef HAVE_LIBC + tpm->fd = -1; +#endif +} + +/* + * Returns max number of nsecs to wait + */ +s_time_t tpm_calc_ordinal_duration(struct tpm_chip *chip, + uint32_t ordinal) +{ + int duration_idx = TPM_UNDEFINED; + s_time_t duration = 0; + + if (ordinal < TPM_MAX_ORDINAL) + duration_idx = tpm_ordinal_duration[ordinal]; + else if ((ordinal & TPM_PROTECTED_ORDINAL_MASK) < + TPM_MAX_PROTECTED_ORDINAL) + duration_idx = + tpm_protected_ordinal_duration[ordinal & + TPM_PROTECTED_ORDINAL_MASK]; + + if (duration_idx != TPM_UNDEFINED) { + duration = chip->duration[duration_idx]; + } + + if (duration <= 0) { + return SECONDS(120); + } + else + { + return duration; + } +} + + +static int locality_enabled(struct tpm_chip* tpm, int l) { + return l >= 0 && tpm->enabled_localities & (1 << l); +} + +static int check_locality(struct tpm_chip* tpm, int l) { + if(locality_enabled(tpm, l) && (ioread8(TPM_ACCESS(tpm, l)) & + (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID)) == + (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID)) { + return l; + } + return -1; +} + +void release_locality(struct tpm_chip* tpm, int l, int force) +{ + if (locality_enabled(tpm, l) && (force || (ioread8(TPM_ACCESS(tpm, l)) & + (TPM_ACCESS_REQUEST_PENDING | TPM_ACCESS_VALID)) == + (TPM_ACCESS_REQUEST_PENDING | TPM_ACCESS_VALID))) { + iowrite8(TPM_ACCESS(tpm, l), TPM_ACCESS_RELINQUISH_LOCALITY); + } +} + +int tpm_tis_request_locality(struct tpm_chip* tpm, int l) { + + s_time_t stop; + /*Make sure locality is valid */ + if(!locality_enabled(tpm, l)) { + printk("tpm_tis_change_locality() Tried to change to locality %d, but it is disabled or invalid!\n", l); + return -1; + } + /* Check if we already have the current locality */ + if(check_locality(tpm, l) >= 0) { + return tpm->locality = l; + } + /* Set the new locality*/ + iowrite8(TPM_ACCESS(tpm, l), TPM_ACCESS_REQUEST_USE); + + if(tpm->irq) { + /* Wait for interrupt */ + wait_event_deadline(tpm->int_queue, (check_locality(tpm, l) >= 0), NOW() + tpm->timeout_a); + + /* FIXME: Handle timeout event, should return error in that case */ + return l; + } else { + /* Wait for burstcount */ + stop = NOW() + tpm->timeout_a; + do { + if(check_locality(tpm, l) >= 0) { + return tpm->locality = l; + } + msleep(TPM_TIMEOUT); + } while(NOW() < stop); + } + + printk("REQ LOCALITY FAILURE\n"); + return -1; +} + +static uint8_t tpm_tis_status(struct tpm_chip* tpm) { + return ioread8(TPM_STS(tpm, tpm->locality)); +} + +/* This causes the current command to be aborted */ +static void tpm_tis_ready(struct tpm_chip* tpm) { + iowrite8(TPM_STS(tpm, tpm->locality), TPM_STS_COMMAND_READY); +} +#define tpm_tis_cancel_cmd(v) tpm_tis_ready(v) + +static int get_burstcount(struct tpm_chip* tpm) { + s_time_t stop; + int burstcnt; + + stop = NOW() + tpm->timeout_d; + do { + burstcnt = ioread8((TPM_STS(tpm, tpm->locality) + 1)); + burstcnt += ioread8(TPM_STS(tpm, tpm->locality) + 2) << 8; + + if (burstcnt) { + return burstcnt; + } + msleep(TPM_TIMEOUT); + } while(NOW() < stop); + return -EBUSY; +} + +static int wait_for_stat(struct tpm_chip* tpm, uint8_t mask, + unsigned long timeout, struct wait_queue_head* queue) { + s_time_t stop; + uint8_t status; + + status = tpm_tis_status(tpm); + if((status & mask) == mask) { + return 0; + } + + if(tpm->irq) { + wait_event_deadline(*queue, ((tpm_tis_status(tpm) & mask) == mask), timeout); + /* FIXME: Check for timeout and return -ETIME */ + return 0; + } else { + stop = NOW() + timeout; + do { + msleep(TPM_TIMEOUT); + status = tpm_tis_status(tpm); + if((status & mask) == mask) + return 0; + } while( NOW() < stop); + } + return -ETIME; +} + +static int recv_data(struct tpm_chip* tpm, uint8_t* buf, size_t count) { + int size = 0; + int burstcnt; + while( size < count && + wait_for_stat(tpm, + TPM_STS_DATA_AVAIL | TPM_STS_VALID, + tpm->timeout_c, + &tpm->read_queue) + == 0) { + burstcnt = get_burstcount(tpm); + for(; burstcnt > 0 && size < count; --burstcnt) + { + buf[size++] = ioread8(TPM_DATA_FIFO(tpm, tpm->locality)); + } + } + return size; +} + +int tpm_tis_recv(struct tpm_chip* tpm, uint8_t* buf, size_t count) { + int size = 0; + int expected, status; + + if (count < TPM_HEADER_SIZE) { + size = -EIO; + goto out; + } + + /* read first 10 bytes, including tag, paramsize, and result */ + if((size = + recv_data(tpm, buf, TPM_HEADER_SIZE)) < TPM_HEADER_SIZE) { + printk("Error reading tpm cmd header\n"); + goto out; + } + + expected = be32_to_cpu(*((uint32_t*)(buf + 2))); + if(expected > count) { + size = -EIO; + goto out; + } + + if((size += recv_data(tpm, & buf[TPM_HEADER_SIZE], + expected - TPM_HEADER_SIZE)) < expected) { + printk("Unable to read rest of tpm command size=%d expected=%d\n", size, expected); + size = -ETIME; + goto out; + } + + wait_for_stat(tpm, TPM_STS_VALID, tpm->timeout_c, &tpm->int_queue); + status = tpm_tis_status(tpm); + if(status & TPM_STS_DATA_AVAIL) { + printk("Error: left over data\n"); + size = -EIO; + goto out; + } + +out: + tpm_tis_ready(tpm); + release_locality(tpm, tpm->locality, 0); + return size; +} +int tpm_tis_send(struct tpm_chip* tpm, uint8_t* buf, size_t len) { + int rc; + int status, burstcnt = 0; + int count = 0; + uint32_t ordinal; + + if(tpm_tis_request_locality(tpm, tpm->locality) < 0) { + return -EBUSY; + } + + status = tpm_tis_status(tpm); + if((status & TPM_STS_COMMAND_READY) == 0) { + tpm_tis_ready(tpm); + if(wait_for_stat(tpm, TPM_STS_COMMAND_READY, tpm->timeout_b, &tpm->int_queue) < 0) { + rc = -ETIME; + goto out_err; + } + } + + while(count < len - 1) { + burstcnt = get_burstcount(tpm); + for(;burstcnt > 0 && count < len -1; --burstcnt) { + iowrite8(TPM_DATA_FIFO(tpm, tpm->locality), buf[count++]); + } + + wait_for_stat(tpm, TPM_STS_VALID, tpm->timeout_c, &tpm->int_queue); + status = tpm_tis_status(tpm); + if((status & TPM_STS_DATA_EXPECT) == 0) { + rc = -EIO; + goto out_err; + } + } + + /*Write last byte*/ + iowrite8(TPM_DATA_FIFO(tpm, tpm->locality), buf[count]); + wait_for_stat(tpm, TPM_STS_VALID, tpm->timeout_c, &tpm->read_queue); + status = tpm_tis_status(tpm); + if((status & TPM_STS_DATA_EXPECT) != 0) { + rc = -EIO; + goto out_err; + } + + /*go and do it*/ + iowrite8(TPM_STS(tpm, tpm->locality), TPM_STS_GO); + + if(tpm->irq) { + /*Wait for interrupt */ + ordinal = be32_to_cpu(*(buf + 6)); + if(wait_for_stat(tpm, + TPM_STS_DATA_AVAIL | TPM_STS_VALID, + tpm_calc_ordinal_duration(tpm, ordinal), + &tpm->read_queue) < 0) { + rc = -ETIME; + goto out_err; + } + } +#ifdef HAVE_LIBC + if(tpm->fd >= 0) { + files[tpm->fd].read = 0; + files[tpm->fd].tpm_tis.respgot = 0; + files[tpm->fd].tpm_tis.offset = 0; + } +#endif + return len; + +out_err: + tpm_tis_ready(tpm); + release_locality(tpm, tpm->locality, 0); + return rc; +} + +static void tpm_tis_irq_handler(evtchn_port_t port, struct pt_regs *regs, void* data) +{ + struct tpm_chip* tpm = data; + uint32_t interrupt; + int i; + + interrupt = ioread32(TPM_INT_STATUS(tpm, tpm->locality)); + if(interrupt == 0) { + return; + } + + if(interrupt & TPM_INTF_DATA_AVAIL_INT) { + wake_up(&tpm->read_queue); + } + if(interrupt & TPM_INTF_LOCALITY_CHANGE_INT) { + for(i = 0; i < 5; ++i) { + if(check_locality(tpm, i) >= 0) { + break; + } + } + } + if(interrupt & (TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_STS_VALID_INT | + TPM_INTF_CMD_READY_INT)) { + wake_up(&tpm->int_queue); + } + + /* Clear interrupts handled with TPM_EOI */ + iowrite32(TPM_INT_STATUS(tpm, tpm->locality), interrupt); + ioread32(TPM_INT_STATUS(tpm, tpm->locality)); + return; +} + +/* + * Internal kernel interface to transmit TPM commands + */ +static ssize_t tpm_transmit(struct tpm_chip *chip, const uint8_t *buf, + size_t bufsiz) +{ + ssize_t rc; + uint32_t count, ordinal; + s_time_t stop; + + count = be32_to_cpu(*((uint32_t *) (buf + 2))); + ordinal = be32_to_cpu(*((uint32_t *) (buf + 6))); + if (count == 0) + return -ENODATA; + if (count > bufsiz) { + printk("Error: invalid count value %x %zx \n", count, bufsiz); + return -E2BIG; + } + + //down(&chip->tpm_mutex); + + if ((rc = tpm_tis_send(chip, (uint8_t *) buf, count)) < 0) { + printk("tpm_transmit: tpm_send: error %ld\n", (long) rc); + goto out; + } + + if (chip->irq) + goto out_recv; + + stop = NOW() + tpm_calc_ordinal_duration(chip, ordinal); + do { + uint8_t status = tpm_tis_status(chip); + if ((status & (TPM_STS_DATA_AVAIL | TPM_STS_VALID)) == + (TPM_STS_DATA_AVAIL | TPM_STS_VALID)) + goto out_recv; + + if ((status == TPM_STS_COMMAND_READY)) { + printk("TPM Error: Operation Canceled\n"); + rc = -ECANCELED; + goto out; + } + + msleep(TPM_TIMEOUT); /* CHECK */ + rmb(); + } while (NOW() < stop); + + /* Cancel the command */ + tpm_tis_cancel_cmd(chip); + printk("TPM Operation Timed out\n"); + rc = -ETIME; + goto out; + +out_recv: + if((rc = tpm_tis_recv(chip, (uint8_t *) buf, bufsiz)) < 0) { + printk("tpm_transmit: tpm_recv: error %d\n", (int) rc); + } +out: + //up(&chip->tpm_mutex); + return rc; +} + +static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd, + int len, const char *desc) +{ + int err; + + len = tpm_transmit(chip,(uint8_t *) cmd, len); + if (len < 0) + return len; + if (len == TPM_ERROR_SIZE) { + err = be32_to_cpu(cmd->header.out.return_code); + printk("A TPM error (%d) occurred %s\n", err, desc); + return err; + } + return 0; +} + +int tpm_get_timeouts(struct tpm_chip *chip) +{ + struct tpm_cmd_t tpm_cmd; + struct timeout_t *timeout_cap; + struct duration_t *duration_cap; + ssize_t rc; + uint32_t timeout; + unsigned int scale = 1; + + tpm_cmd.header.in = tpm_getcap_header; + tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; + tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4); + tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT; + + if((rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, + "attempting to determine the timeouts")) != 0) { + printk("transmit failed %d\n", (int) rc); + goto duration; + } + + if(be32_to_cpu(tpm_cmd.header.out.return_code) != 0 || + be32_to_cpu(tpm_cmd.header.out.length) != + sizeof(tpm_cmd.header.out) + sizeof(uint32_t) + 4 * sizeof(uint32_t)) { + return -EINVAL; + } + + timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout; + /* Don't overwrite default if value is 0 */ + timeout = be32_to_cpu(timeout_cap->a); + if(timeout && timeout < 1000) { + /* timeouts in msc rather usec */ + scale = 1000; + } + if (timeout) + chip->timeout_a = MICROSECS(timeout * scale); /*Convert to msec */ + ADJUST_TIMEOUTS_TO_STANDARD(chip->timeout_a,MILLISECS(TIS_SHORT_TIMEOUT),'a'); + + timeout = be32_to_cpu(timeout_cap->b); + if (timeout) + chip->timeout_b = MICROSECS(timeout * scale); /*Convert to msec */ + ADJUST_TIMEOUTS_TO_STANDARD(chip->timeout_b,MILLISECS(TIS_LONG_TIMEOUT),'b'); + + timeout = be32_to_cpu(timeout_cap->c); + if (timeout) + chip->timeout_c = MICROSECS(timeout * scale); /*Convert to msec */ + ADJUST_TIMEOUTS_TO_STANDARD(chip->timeout_c,MILLISECS(TIS_SHORT_TIMEOUT),'c'); + + timeout = be32_to_cpu(timeout_cap->d); + if (timeout) + chip->timeout_d = MICROSECS(timeout * scale); /*Convert to msec */ + ADJUST_TIMEOUTS_TO_STANDARD(chip->timeout_d,MILLISECS(TIS_SHORT_TIMEOUT),'d'); + +duration: + tpm_cmd.header.in = tpm_getcap_header; + tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; + tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4); + tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_DURATION; + + if((rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, + "attempting to determine the durations")) < 0) { + return rc; + } + + if(be32_to_cpu(tpm_cmd.header.out.return_code) != 0 || + be32_to_cpu(tpm_cmd.header.out.length) != + sizeof(tpm_cmd.header.out) + sizeof(uint32_t) + 3 * sizeof(uint32_t)) { + return -EINVAL; + } + + duration_cap = &tpm_cmd.params.getcap_out.cap.duration; + chip->duration[TPM_SHORT] = MICROSECS(be32_to_cpu(duration_cap->tpm_short)); + chip->duration[TPM_MEDIUM] = MICROSECS(be32_to_cpu(duration_cap->tpm_medium)); + chip->duration[TPM_LONG] = MICROSECS(be32_to_cpu(duration_cap->tpm_long)); + + /* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above + * value wrong and apparently reports msecs rather than usecs. So we + * fix up the resulting too-small TPM_SHORT value to make things work. + */ + if (chip->duration[TPM_SHORT] < MILLISECS(10)) { + chip->duration[TPM_SHORT] = SECONDS(1); + chip->duration[TPM_MEDIUM] *= 1000; + chip->duration[TPM_LONG] *= 1000; + printk("Adjusting TPM timeout parameters\n"); + } + + return 0; +} + + + +void tpm_continue_selftest(struct tpm_chip* chip) { + uint8_t data[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 10, /* length */ + 0, 0, 0, 83, /* TPM_ORD_GetCapability */ + }; + + tpm_transmit(chip, data, sizeof(data)); +} + +ssize_t tpm_getcap(struct tpm_chip *chip, uint32_t subcap_id, cap_t *cap, + const char *desc) +{ + struct tpm_cmd_t tpm_cmd; + int rc; + + tpm_cmd.header.in = tpm_getcap_header; + if (subcap_id == CAP_VERSION_1_1 || subcap_id == CAP_VERSION_1_2) { + tpm_cmd.params.getcap_in.cap = subcap_id; + /*subcap field not necessary */ + tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(0); + tpm_cmd.header.in.length -= cpu_to_be32(sizeof(uint32_t)); + } else { + if (subcap_id == TPM_CAP_FLAG_PERM || + subcap_id == TPM_CAP_FLAG_VOL) + tpm_cmd.params.getcap_in.cap = TPM_CAP_FLAG; + else + tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; + tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4); + tpm_cmd.params.getcap_in.subcap = subcap_id; + } + rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc); + if (!rc) + *cap = tpm_cmd.params.getcap_out.cap; + return rc; +} + + +struct tpm_chip* init_tpm_tis(unsigned long baseaddr, int localities, unsigned int irq) +{ + int i; + unsigned long addr; + struct tpm_chip* tpm = NULL; + uint32_t didvid; + uint32_t intfcaps; + uint32_t intmask; + + printk("============= Init TPM TIS Driver ==============\n"); + + /*Sanity check the localities input */ + if(localities & ~TPM_TIS_EN_LOCLALL) { + printk("init_tpm_tis() Invalid locality specification! %X\n", localities); + goto abort_egress; + } + + printk("IOMEM Machine Base Address: %lX\n", baseaddr); + + /* Create the tpm data structure */ + tpm = malloc(sizeof(struct tpm_chip)); + __init_tpm_chip(tpm); + + /* Set the enabled localities - if 0 we leave default as all enabled */ + if(localities != 0) { + tpm->enabled_localities = localities; + } + printk("Enabled Localities: "); + for(i = 0; i < 5; ++i) { + if(locality_enabled(tpm, i)) { + printk("%d ", i); + } + } + printk("\n"); + + /* Set the base machine address */ + tpm->baseaddr = baseaddr; + + /* Set default timeouts */ + tpm->timeout_a = MILLISECS(TIS_SHORT_TIMEOUT); + tpm->timeout_b = MILLISECS(TIS_LONG_TIMEOUT); + tpm->timeout_c = MILLISECS(TIS_SHORT_TIMEOUT); + tpm->timeout_d = MILLISECS(TIS_SHORT_TIMEOUT); + + /*Map the mmio pages */ + addr = tpm->baseaddr; + for(i = 0; i < 5; ++i) { + if(locality_enabled(tpm, i)) { + /* Map the page in now */ + if((tpm->pages[i] = ioremap_nocache(addr, PAGE_SIZE)) == NULL) { + printk("Unable to map iomem page a address %lx\n", addr); + goto abort_egress; + } + + /* Set default locality to the first enabled one */ + if (tpm->locality < 0) { + if(tpm_tis_request_locality(tpm, i) < 0) { + printk("Unable to request locality %d??\n", i); + goto abort_egress; + } + } + } + addr += PAGE_SIZE; + } + + + /* Get the vendor and device ids */ + didvid = ioread32(TPM_DID_VID(tpm, tpm->locality)); + tpm->did = didvid >> 16; + tpm->vid = didvid & 0xFFFF; + + + /* Get the revision id */ + tpm->rid = ioread8(TPM_RID(tpm, tpm->locality)); + + printk("1.2 TPM (device-id=0x%X vendor-id = %X rev-id = %X)\n", tpm->did, tpm->vid, tpm->rid); + + intfcaps = ioread32(TPM_INTF_CAPS(tpm, tpm->locality)); + printk("TPM interface capabilities (0x%x):\n", intfcaps); + if (intfcaps & TPM_INTF_BURST_COUNT_STATIC) + printk("\tBurst Count Static\n"); + if (intfcaps & TPM_INTF_CMD_READY_INT) + printk("\tCommand Ready Int Support\n"); + if (intfcaps & TPM_INTF_INT_EDGE_FALLING) + printk("\tInterrupt Edge Falling\n"); + if (intfcaps & TPM_INTF_INT_EDGE_RISING) + printk("\tInterrupt Edge Rising\n"); + if (intfcaps & TPM_INTF_INT_LEVEL_LOW) + printk("\tInterrupt Level Low\n"); + if (intfcaps & TPM_INTF_INT_LEVEL_HIGH) + printk("\tInterrupt Level High\n"); + if (intfcaps & TPM_INTF_LOCALITY_CHANGE_INT) + printk("\tLocality Change Int Support\n"); + if (intfcaps & TPM_INTF_STS_VALID_INT) + printk("\tSts Valid Int Support\n"); + if (intfcaps & TPM_INTF_DATA_AVAIL_INT) + printk("\tData Avail Int Support\n"); + + /*Interupt setup */ + intmask = ioread32(TPM_INT_ENABLE(tpm, tpm->locality)); + + intmask |= TPM_INTF_CMD_READY_INT + | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_DATA_AVAIL_INT + | TPM_INTF_STS_VALID_INT; + + iowrite32(TPM_INT_ENABLE(tpm, tpm->locality), intmask); + + /*If interupts are enabled, handle it */ + if(irq) { + if(irq != TPM_PROBE_IRQ) { + tpm->irq = irq; + } else { + /*FIXME add irq probing feature later */ + printk("IRQ probing not implemented\n"); + } + } + + if(tpm->irq) { + iowrite8(TPM_INT_VECTOR(tpm, tpm->locality), tpm->irq); + + if(bind_pirq(tpm->irq, 1, tpm_tis_irq_handler, tpm) != 0) { + printk("Unabled to request irq: %u for use\n", tpm->irq); + printk("Will use polling mode\n"); + tpm->irq = 0; + } else { + /* Clear all existing */ + iowrite32(TPM_INT_STATUS(tpm, tpm->locality), ioread32(TPM_INT_STATUS(tpm, tpm->locality))); + + /* Turn on interrupts */ + iowrite32(TPM_INT_ENABLE(tpm, tpm->locality), intmask | TPM_GLOBAL_INT_ENABLE); + } + } + + if(tpm_get_timeouts(tpm)) { + printk("Could not get TPM timeouts and durations\n"); + goto abort_egress; + } + tpm_continue_selftest(tpm); + + + return tpm; +abort_egress: + if(tpm != NULL) { + shutdown_tpm_tis(tpm); + } + return NULL; +} + +void shutdown_tpm_tis(struct tpm_chip* tpm){ + int i; + + printk("Shutting down tpm_tis device\n"); + + iowrite32(TPM_INT_ENABLE(tpm, tpm->locality), ~TPM_GLOBAL_INT_ENABLE); + + /*Unmap all of the mmio pages */ + for(i = 0; i < 5; ++i) { + if(tpm->pages[i] != NULL) { + iounmap(tpm->pages[i], PAGE_SIZE); + tpm->pages[i] = NULL; + } + } + free(tpm); + return; +} + + +int tpm_tis_cmd(struct tpm_chip* tpm, uint8_t* req, size_t reqlen, uint8_t** resp, size_t* resplen) +{ + if(tpm->locality < 0) { + printk("tpm_tis_cmd() failed! locality not set!\n"); + return -1; + } + if(reqlen > TPM_BUFSIZE) { + reqlen = TPM_BUFSIZE; + } + memcpy(tpm->data_buffer, req, reqlen); + *resplen = tpm_transmit(tpm, tpm->data_buffer, TPM_BUFSIZE); + + *resp = malloc(*resplen); + memcpy(*resp, tpm->data_buffer, *resplen); + return 0; +} + +#ifdef HAVE_LIBC +int tpm_tis_open(struct tpm_chip* tpm) +{ + /* Silently prevent multiple opens */ + if(tpm->fd != -1) { + return tpm->fd; + } + + tpm->fd = alloc_fd(FTYPE_TPM_TIS); + printk("tpm_tis_open() -> %d\n", tpm->fd); + files[tpm->fd].tpm_tis.dev = tpm; + files[tpm->fd].tpm_tis.offset = 0; + files[tpm->fd].tpm_tis.respgot = 0; + return tpm->fd; +} + +int tpm_tis_posix_write(int fd, const uint8_t* buf, size_t count) +{ + struct tpm_chip* tpm; + tpm = files[fd].tpm_tis.dev; + + if(tpm->locality < 0) { + printk("tpm_tis_posix_write() failed! locality not set!\n"); + errno = EINPROGRESS; + return -1; + } + if(count == 0) { + return 0; + } + + /* Return an error if we are already processing a command */ + if(count > TPM_BUFSIZE) { + count = TPM_BUFSIZE; + } + /* Send the command now */ + memcpy(tpm->data_buffer, buf, count); + if((tpm->data_len = tpm_transmit(tpm, tpm->data_buffer, TPM_BUFSIZE)) < 0) { + errno = EIO; + return -1; + } + return count; +} + +int tpm_tis_posix_read(int fd, uint8_t* buf, size_t count) +{ + int rc; + struct tpm_chip* tpm; + tpm = files[fd].tpm_tis.dev; + + if(count == 0) { + return 0; + } + + /* If there is no tpm resp to read, return EIO */ + if(tpm->data_len < 0) { + errno = EIO; + return -1; + } + + + /* Handle EOF case */ + if(files[fd].tpm_tis.offset >= tpm->data_len) { + rc = 0; + } else { + rc = min(tpm->data_len - files[fd].tpm_tis.offset, count); + memcpy(buf, tpm->data_buffer + files[fd].tpm_tis.offset, rc); + } + files[fd].tpm_tis.offset += rc; + /* Reset the data pending flag */ + return rc; +} +int tpm_tis_posix_fstat(int fd, struct stat* buf) +{ + struct tpm_chip* tpm; + tpm = files[fd].tpm_tis.dev; + + buf->st_mode = O_RDWR; + buf->st_uid = 0; + buf->st_gid = 0; + buf->st_size = be32_to_cpu(*((uint32_t*)(tpm->data_buffer + 2))); + buf->st_atime = buf->st_mtime = buf->st_ctime = time(NULL); + return 0; +} + +/* TPM 2.0 */ + +/*TPM2.0 Selftest*/ +static void tpm2_selftest(struct tpm_chip* chip) +{ + uint8_t data[] = { + 0x80, 0x1, + 0x0, 0x0, 0x0, 0xb, + 0x0, 0x0, 0x1, 0x43, + 0x1, + }; + + tpm_transmit(chip, data, sizeof(data)); +} + +struct tpm_chip* init_tpm2_tis(unsigned long baseaddr, int localities, unsigned int irq) +{ + int i; + unsigned long addr; + struct tpm_chip* tpm = NULL; + uint32_t didvid; + uint32_t intfcaps; + uint32_t intmask; + + printk("============= Init TPM2 TIS Driver ==============\n"); + + /*Sanity check the localities input */ + if (localities & ~TPM_TIS_EN_LOCLALL) { + printk("init_tpm2_tis Invalid locality specification! %X\n", localities); + goto abort_egress; + } + + printk("IOMEM Machine Base Address: %lX\n", baseaddr); + + /* Create the tpm data structure */ + tpm = malloc(sizeof(struct tpm_chip)); + __init_tpm_chip(tpm); + + /* Set the enabled localities - if 0 we leave default as all enabled */ + if (localities != 0) { + tpm->enabled_localities = localities; + } + printk("Enabled Localities: "); + for (i = 0; i < 5; ++i) { + if (locality_enabled(tpm, i)) { + printk("%d ", i); + } + } + printk("\n"); + + /* Set the base machine address */ + tpm->baseaddr = baseaddr; + + /* Set default timeouts */ + tpm->timeout_a = MILLISECS(TIS_SHORT_TIMEOUT); + tpm->timeout_b = MILLISECS(TIS_LONG_TIMEOUT); + tpm->timeout_c = MILLISECS(TIS_SHORT_TIMEOUT); + tpm->timeout_d = MILLISECS(TIS_SHORT_TIMEOUT); + + /*Map the mmio pages */ + addr = tpm->baseaddr; + for (i = 0; i < 5; ++i) { + if (locality_enabled(tpm, i)) { + + /* Map the page in now */ + if ((tpm->pages[i] = ioremap_nocache(addr, PAGE_SIZE)) == NULL) { + printk("Unable to map iomem page a address %lx\n", addr); + goto abort_egress; + } + + /* Set default locality to the first enabled one */ + if (tpm->locality < 0) { + if (tpm_tis_request_locality(tpm, i) < 0) { + printk("Unable to request locality %d??\n", i); + goto abort_egress; + } + } + } + addr += PAGE_SIZE; + } + + /* Get the vendor and device ids */ + didvid = ioread32(TPM_DID_VID(tpm, tpm->locality)); + tpm->did = didvid >> 16; + tpm->vid = didvid & 0xFFFF; + + /* Get the revision id */ + tpm->rid = ioread8(TPM_RID(tpm, tpm->locality)); + printk("2.0 TPM (device-id=0x%X vendor-id = %X rev-id = %X)\n", + tpm->did, tpm->vid, tpm->rid); + + intfcaps = ioread32(TPM_INTF_CAPS(tpm, tpm->locality)); + printk("TPM interface capabilities (0x%x):\n", intfcaps); + if (intfcaps & TPM_INTF_BURST_COUNT_STATIC) + printk("\tBurst Count Static\n"); + if (intfcaps & TPM_INTF_CMD_READY_INT) + printk("\tCommand Ready Int Support\n"); + if (intfcaps & TPM_INTF_INT_EDGE_FALLING) + printk("\tInterrupt Edge Falling\n"); + if (intfcaps & TPM_INTF_INT_EDGE_RISING) + printk("\tInterrupt Edge Rising\n"); + if (intfcaps & TPM_INTF_INT_LEVEL_LOW) + printk("\tInterrupt Level Low\n"); + if (intfcaps & TPM_INTF_INT_LEVEL_HIGH) + printk("\tInterrupt Level High\n"); + if (intfcaps & TPM_INTF_LOCALITY_CHANGE_INT) + printk("\tLocality Change Int Support\n"); + if (intfcaps & TPM_INTF_STS_VALID_INT) + printk("\tSts Valid Int Support\n"); + if (intfcaps & TPM_INTF_DATA_AVAIL_INT) + printk("\tData Avail Int Support\n"); + + /*Interupt setup */ + intmask = ioread32(TPM_INT_ENABLE(tpm, tpm->locality)); + + intmask |= TPM_INTF_CMD_READY_INT | TPM_INTF_LOCALITY_CHANGE_INT | + TPM_INTF_DATA_AVAIL_INT | TPM_INTF_STS_VALID_INT; + + iowrite32(TPM_INT_ENABLE(tpm, tpm->locality), intmask); + + /*If interupts are enabled, handle it */ + if (irq) { + if (irq != TPM_PROBE_IRQ) { + tpm->irq = irq; + } else { + /*FIXME add irq probing feature later */ + printk("IRQ probing not implemented\n"); + } + } + + if (tpm->irq) { + iowrite8(TPM_INT_VECTOR(tpm, tpm->locality), tpm->irq); + if (bind_pirq(tpm->irq, 1, tpm_tis_irq_handler, tpm) != 0) { + printk("Unabled to request irq: %u for use\n", tpm->irq); + printk("Will use polling mode\n"); + tpm->irq = 0; + } else { + + /* Clear all existing */ + iowrite32(TPM_INT_STATUS(tpm, tpm->locality), + ioread32(TPM_INT_STATUS(tpm, tpm->locality))); + + /* Turn on interrupts */ + iowrite32(TPM_INT_ENABLE(tpm, tpm->locality), + intmask | TPM_GLOBAL_INT_ENABLE); + } + } + + tpm2_selftest(tpm); + return tpm; + +abort_egress: + if (tpm != NULL) { + shutdown_tpm_tis(tpm); + } + return NULL; +} +#endif diff -Nru xen-4.6.0/extras/mini-os/xenbus/xenbus.c xen-4.6.5/extras/mini-os/xenbus/xenbus.c --- xen-4.6.0/extras/mini-os/xenbus/xenbus.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.6.5/extras/mini-os/xenbus/xenbus.c 2016-05-17 15:32:08.000000000 +0000 @@ -0,0 +1,875 @@ +/* + **************************************************************************** + * (C) 2006 - Cambridge University + **************************************************************************** + * + * File: xenbus.c + * Author: Steven Smith (sos22@cam.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * Changes: John D. Ramsdell + * + * Date: Jun 2006, chages Aug 2005 + * + * Environment: Xen Minimal OS + * Description: Minimal implementation of xenbus + * + **************************************************************************** + **/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define min(x,y) ({ \ + typeof(x) tmpx = (x); \ + typeof(y) tmpy = (y); \ + tmpx < tmpy ? tmpx : tmpy; \ + }) + +#ifdef XENBUS_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=xenbus.c, line=%d) " _f , __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + +static struct xenstore_domain_interface *xenstore_buf; +static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); +DECLARE_WAIT_QUEUE_HEAD(xenbus_watch_queue); + +xenbus_event_queue xenbus_events; +static struct watch { + char *token; + xenbus_event_queue *events; + struct watch *next; +} *watches; +struct xenbus_req_info +{ + int in_use:1; + struct wait_queue_head waitq; + void *reply; +}; + +#define NR_REQS 32 +static struct xenbus_req_info req_info[NR_REQS]; + +static void memcpy_from_ring(const void *Ring, + void *Dest, + int off, + int len) +{ + int c1, c2; + const char *ring = Ring; + char *dest = Dest; + c1 = min(len, XENSTORE_RING_SIZE - off); + c2 = len - c1; + memcpy(dest, ring + off, c1); + memcpy(dest + c1, ring, c2); +} + +char **xenbus_wait_for_watch_return(xenbus_event_queue *queue) +{ + struct xenbus_event *event; + DEFINE_WAIT(w); + if (!queue) + queue = &xenbus_events; + while (!(event = *queue)) { + add_waiter(w, xenbus_watch_queue); + schedule(); + } + remove_waiter(w, xenbus_watch_queue); + *queue = event->next; + return &event->path; +} + +void xenbus_wait_for_watch(xenbus_event_queue *queue) +{ + char **ret; + if (!queue) + queue = &xenbus_events; + ret = xenbus_wait_for_watch_return(queue); + if (ret) + free(ret); + else + printk("unexpected path returned by watch\n"); +} + +char* xenbus_wait_for_value(const char* path, const char* value, xenbus_event_queue *queue) +{ + if (!queue) + queue = &xenbus_events; + for(;;) + { + char *res, *msg; + int r; + + msg = xenbus_read(XBT_NIL, path, &res); + if(msg) return msg; + + r = strcmp(value,res); + free(res); + + if(r==0) break; + else xenbus_wait_for_watch(queue); + } + return NULL; +} + +char *xenbus_switch_state(xenbus_transaction_t xbt, const char* path, XenbusState state) +{ + char *current_state; + char *msg = NULL; + char *msg2 = NULL; + char value[2]; + XenbusState rs; + int xbt_flag = 0; + int retry = 0; + + do { + if (xbt == XBT_NIL) { + msg = xenbus_transaction_start(&xbt); + if (msg) goto exit; + xbt_flag = 1; + } + + msg = xenbus_read(xbt, path, ¤t_state); + if (msg) goto exit; + + rs = (XenbusState) (current_state[0] - '0'); + free(current_state); + if (rs == state) { + msg = NULL; + goto exit; + } + + snprintf(value, 2, "%d", state); + msg = xenbus_write(xbt, path, value); + +exit: + if (xbt_flag) { + msg2 = xenbus_transaction_end(xbt, 0, &retry); + xbt = XBT_NIL; + } + if (msg == NULL && msg2 != NULL) + msg = msg2; + } while (retry); + + return msg; +} + +char *xenbus_wait_for_state_change(const char* path, XenbusState *state, xenbus_event_queue *queue) +{ + if (!queue) + queue = &xenbus_events; + for(;;) + { + char *res, *msg; + XenbusState rs; + + msg = xenbus_read(XBT_NIL, path, &res); + if(msg) return msg; + + rs = (XenbusState) (res[0] - 48); + free(res); + + if (rs == *state) + xenbus_wait_for_watch(queue); + else { + *state = rs; + break; + } + } + return NULL; +} + + +static void xenbus_thread_func(void *ign) +{ + struct xsd_sockmsg msg; + unsigned prod = xenstore_buf->rsp_prod; + + for (;;) + { + wait_event(xb_waitq, prod != xenstore_buf->rsp_prod); + while (1) + { + prod = xenstore_buf->rsp_prod; + DEBUG("Rsp_cons %d, rsp_prod %d.\n", xenstore_buf->rsp_cons, + xenstore_buf->rsp_prod); + if (xenstore_buf->rsp_prod - xenstore_buf->rsp_cons < sizeof(msg)) + break; + rmb(); + memcpy_from_ring(xenstore_buf->rsp, + &msg, + MASK_XENSTORE_IDX(xenstore_buf->rsp_cons), + sizeof(msg)); + DEBUG("Msg len %d, %d avail, id %d.\n", + msg.len + sizeof(msg), + xenstore_buf->rsp_prod - xenstore_buf->rsp_cons, + msg.req_id); + if (xenstore_buf->rsp_prod - xenstore_buf->rsp_cons < + sizeof(msg) + msg.len) + break; + + DEBUG("Message is good.\n"); + + if(msg.type == XS_WATCH_EVENT) + { + struct xenbus_event *event = malloc(sizeof(*event) + msg.len); + xenbus_event_queue *events = NULL; + char *data = (char*)event + sizeof(*event); + struct watch *watch; + + memcpy_from_ring(xenstore_buf->rsp, + data, + MASK_XENSTORE_IDX(xenstore_buf->rsp_cons + sizeof(msg)), + msg.len); + + event->path = data; + event->token = event->path + strlen(event->path) + 1; + + mb(); + xenstore_buf->rsp_cons += msg.len + sizeof(msg); + + for (watch = watches; watch; watch = watch->next) + if (!strcmp(watch->token, event->token)) { + events = watch->events; + break; + } + + if (events) { + event->next = *events; + *events = event; + wake_up(&xenbus_watch_queue); + } else { + printk("unexpected watch token %s\n", event->token); + free(event); + } + } + + else + { + req_info[msg.req_id].reply = malloc(sizeof(msg) + msg.len); + memcpy_from_ring(xenstore_buf->rsp, + req_info[msg.req_id].reply, + MASK_XENSTORE_IDX(xenstore_buf->rsp_cons), + msg.len + sizeof(msg)); + mb(); + xenstore_buf->rsp_cons += msg.len + sizeof(msg); + wake_up(&req_info[msg.req_id].waitq); + } + + wmb(); + notify_remote_via_evtchn(start_info.store_evtchn); + } + } +} + +static void xenbus_evtchn_handler(evtchn_port_t port, struct pt_regs *regs, + void *ign) +{ + wake_up(&xb_waitq); +} + +static int nr_live_reqs; +static DEFINE_SPINLOCK(req_lock); +static DECLARE_WAIT_QUEUE_HEAD(req_wq); + +/* Release a xenbus identifier */ +static void release_xenbus_id(int id) +{ + BUG_ON(!req_info[id].in_use); + spin_lock(&req_lock); + req_info[id].in_use = 0; + nr_live_reqs--; + req_info[id].in_use = 0; + if (nr_live_reqs == NR_REQS - 1) + wake_up(&req_wq); + spin_unlock(&req_lock); +} + +/* Allocate an identifier for a xenbus request. Blocks if none are + available. */ +static int allocate_xenbus_id(void) +{ + static int probe; + int o_probe; + + while (1) + { + spin_lock(&req_lock); + if (nr_live_reqs < NR_REQS) + break; + spin_unlock(&req_lock); + wait_event(req_wq, (nr_live_reqs < NR_REQS)); + } + + o_probe = probe; + for (;;) + { + if (!req_info[o_probe].in_use) + break; + o_probe = (o_probe + 1) % NR_REQS; + BUG_ON(o_probe == probe); + } + nr_live_reqs++; + req_info[o_probe].in_use = 1; + probe = (o_probe + 1) % NR_REQS; + spin_unlock(&req_lock); + init_waitqueue_head(&req_info[o_probe].waitq); + + return o_probe; +} + +/* Initialise xenbus. */ +void init_xenbus(void) +{ + int err; + DEBUG("init_xenbus called.\n"); + xenstore_buf = mfn_to_virt(start_info.store_mfn); + create_thread("xenstore", xenbus_thread_func, NULL); + DEBUG("buf at %p.\n", xenstore_buf); + err = bind_evtchn(start_info.store_evtchn, + xenbus_evtchn_handler, + NULL); + unmask_evtchn(start_info.store_evtchn); + printk("xenbus initialised on irq %d mfn %#llx\n", + err, (unsigned long long) start_info.store_mfn); +} + +void fini_xenbus(void) +{ +} + +/* Send data to xenbus. This can block. All of the requests are seen + by xenbus as if sent atomically. The header is added + automatically, using type %type, req_id %req_id, and trans_id + %trans_id. */ +static void xb_write(int type, int req_id, xenbus_transaction_t trans_id, + const struct write_req *req, int nr_reqs) +{ + XENSTORE_RING_IDX prod; + int r; + int len = 0; + const struct write_req *cur_req; + int req_off; + int total_off; + int this_chunk; + struct xsd_sockmsg m = {.type = type, .req_id = req_id, + .tx_id = trans_id }; + struct write_req header_req = { &m, sizeof(m) }; + + for (r = 0; r < nr_reqs; r++) + len += req[r].len; + m.len = len; + len += sizeof(m); + + cur_req = &header_req; + + BUG_ON(len > XENSTORE_RING_SIZE); + /* Wait for the ring to drain to the point where we can send the + message. */ + prod = xenstore_buf->req_prod; + if (prod + len - xenstore_buf->req_cons > XENSTORE_RING_SIZE) + { + /* Wait for there to be space on the ring */ + DEBUG("prod %d, len %d, cons %d, size %d; waiting.\n", + prod, len, xenstore_buf->req_cons, XENSTORE_RING_SIZE); + wait_event(xb_waitq, + xenstore_buf->req_prod + len - xenstore_buf->req_cons <= + XENSTORE_RING_SIZE); + DEBUG("Back from wait.\n"); + prod = xenstore_buf->req_prod; + } + + /* We're now guaranteed to be able to send the message without + overflowing the ring. Do so. */ + total_off = 0; + req_off = 0; + while (total_off < len) + { + this_chunk = min(cur_req->len - req_off, + XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod)); + memcpy((char *)xenstore_buf->req + MASK_XENSTORE_IDX(prod), + (char *)cur_req->data + req_off, this_chunk); + prod += this_chunk; + req_off += this_chunk; + total_off += this_chunk; + if (req_off == cur_req->len) + { + req_off = 0; + if (cur_req == &header_req) + cur_req = req; + else + cur_req++; + } + } + + DEBUG("Complete main loop of xb_write.\n"); + BUG_ON(req_off != 0); + BUG_ON(total_off != len); + BUG_ON(prod > xenstore_buf->req_cons + XENSTORE_RING_SIZE); + + /* Remote must see entire message before updating indexes */ + wmb(); + + xenstore_buf->req_prod += len; + + /* Send evtchn to notify remote */ + notify_remote_via_evtchn(start_info.store_evtchn); +} + +/* Send a mesasge to xenbus, in the same fashion as xb_write, and + block waiting for a reply. The reply is malloced and should be + freed by the caller. */ +struct xsd_sockmsg * +xenbus_msg_reply(int type, + xenbus_transaction_t trans, + struct write_req *io, + int nr_reqs) +{ + int id; + DEFINE_WAIT(w); + struct xsd_sockmsg *rep; + + id = allocate_xenbus_id(); + add_waiter(w, req_info[id].waitq); + + xb_write(type, id, trans, io, nr_reqs); + + schedule(); + remove_waiter(w, req_info[id].waitq); + wake(current); + + rep = req_info[id].reply; + BUG_ON(rep->req_id != id); + release_xenbus_id(id); + return rep; +} + +static char *errmsg(struct xsd_sockmsg *rep) +{ + char *res; + if (!rep) { + char msg[] = "No reply"; + size_t len = strlen(msg) + 1; + return memcpy(malloc(len), msg, len); + } + if (rep->type != XS_ERROR) + return NULL; + res = malloc(rep->len + 1); + memcpy(res, rep + 1, rep->len); + res[rep->len] = 0; + free(rep); + return res; +} + +/* Send a debug message to xenbus. Can block. */ +static void xenbus_debug_msg(const char *msg) +{ + int len = strlen(msg); + struct write_req req[] = { + { "print", sizeof("print") }, + { msg, len }, + { "", 1 }}; + struct xsd_sockmsg *reply; + + reply = xenbus_msg_reply(XS_DEBUG, 0, req, ARRAY_SIZE(req)); + printk("Got a reply, type %d, id %d, len %d.\n", + reply->type, reply->req_id, reply->len); +} + +/* List the contents of a directory. Returns a malloc()ed array of + pointers to malloc()ed strings. The array is NULL terminated. May + block. */ +char *xenbus_ls(xenbus_transaction_t xbt, const char *pre, char ***contents) +{ + struct xsd_sockmsg *reply, *repmsg; + struct write_req req[] = { { pre, strlen(pre)+1 } }; + int nr_elems, x, i; + char **res, *msg; + + repmsg = xenbus_msg_reply(XS_DIRECTORY, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(repmsg); + if (msg) { + *contents = NULL; + return msg; + } + reply = repmsg + 1; + for (x = nr_elems = 0; x < repmsg->len; x++) + nr_elems += (((char *)reply)[x] == 0); + res = malloc(sizeof(res[0]) * (nr_elems + 1)); + for (x = i = 0; i < nr_elems; i++) { + int l = strlen((char *)reply + x); + res[i] = malloc(l + 1); + memcpy(res[i], (char *)reply + x, l + 1); + x += l + 1; + } + res[i] = NULL; + free(repmsg); + *contents = res; + return NULL; +} + +char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value) +{ + struct write_req req[] = { {path, strlen(path) + 1} }; + struct xsd_sockmsg *rep; + char *res, *msg; + rep = xenbus_msg_reply(XS_READ, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) { + *value = NULL; + return msg; + } + res = malloc(rep->len + 1); + memcpy(res, rep + 1, rep->len); + res[rep->len] = 0; + free(rep); + *value = res; + return NULL; +} + +char *xenbus_write(xenbus_transaction_t xbt, const char *path, const char *value) +{ + struct write_req req[] = { + {path, strlen(path) + 1}, + {value, strlen(value)}, + }; + struct xsd_sockmsg *rep; + char *msg; + rep = xenbus_msg_reply(XS_WRITE, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) return msg; + free(rep); + return NULL; +} + +char* xenbus_watch_path_token( xenbus_transaction_t xbt, const char *path, const char *token, xenbus_event_queue *events) +{ + struct xsd_sockmsg *rep; + + struct write_req req[] = { + {path, strlen(path) + 1}, + {token, strlen(token) + 1}, + }; + + struct watch *watch = malloc(sizeof(*watch)); + + char *msg; + + if (!events) + events = &xenbus_events; + + watch->token = strdup(token); + watch->events = events; + watch->next = watches; + watches = watch; + + rep = xenbus_msg_reply(XS_WATCH, xbt, req, ARRAY_SIZE(req)); + + msg = errmsg(rep); + if (msg) return msg; + free(rep); + + return NULL; +} + +char* xenbus_unwatch_path_token( xenbus_transaction_t xbt, const char *path, const char *token) +{ + struct xsd_sockmsg *rep; + + struct write_req req[] = { + {path, strlen(path) + 1}, + {token, strlen(token) + 1}, + }; + + struct watch *watch, **prev; + + char *msg; + + rep = xenbus_msg_reply(XS_UNWATCH, xbt, req, ARRAY_SIZE(req)); + + msg = errmsg(rep); + if (msg) return msg; + free(rep); + + for (prev = &watches, watch = *prev; watch; prev = &watch->next, watch = *prev) + if (!strcmp(watch->token, token)) { + free(watch->token); + *prev = watch->next; + free(watch); + break; + } + + return NULL; +} + +char *xenbus_rm(xenbus_transaction_t xbt, const char *path) +{ + struct write_req req[] = { {path, strlen(path) + 1} }; + struct xsd_sockmsg *rep; + char *msg; + rep = xenbus_msg_reply(XS_RM, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) + return msg; + free(rep); + return NULL; +} + +char *xenbus_get_perms(xenbus_transaction_t xbt, const char *path, char **value) +{ + struct write_req req[] = { {path, strlen(path) + 1} }; + struct xsd_sockmsg *rep; + char *res, *msg; + rep = xenbus_msg_reply(XS_GET_PERMS, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) { + *value = NULL; + return msg; + } + res = malloc(rep->len + 1); + memcpy(res, rep + 1, rep->len); + res[rep->len] = 0; + free(rep); + *value = res; + return NULL; +} + +#define PERM_MAX_SIZE 32 +char *xenbus_set_perms(xenbus_transaction_t xbt, const char *path, domid_t dom, char perm) +{ + char value[PERM_MAX_SIZE]; + struct write_req req[] = { + {path, strlen(path) + 1}, + {value, 0}, + }; + struct xsd_sockmsg *rep; + char *msg; + snprintf(value, PERM_MAX_SIZE, "%c%hu", perm, dom); + req[1].len = strlen(value) + 1; + rep = xenbus_msg_reply(XS_SET_PERMS, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) + return msg; + free(rep); + return NULL; +} + +char *xenbus_transaction_start(xenbus_transaction_t *xbt) +{ + /* xenstored becomes angry if you send a length 0 message, so just + shove a nul terminator on the end */ + struct write_req req = { "", 1}; + struct xsd_sockmsg *rep; + char *err; + + rep = xenbus_msg_reply(XS_TRANSACTION_START, 0, &req, 1); + err = errmsg(rep); + if (err) + return err; + sscanf((char *)(rep + 1), "%lu", xbt); + free(rep); + return NULL; +} + +char * +xenbus_transaction_end(xenbus_transaction_t t, int abort, int *retry) +{ + struct xsd_sockmsg *rep; + struct write_req req; + char *err; + + *retry = 0; + + req.data = abort ? "F" : "T"; + req.len = 2; + rep = xenbus_msg_reply(XS_TRANSACTION_END, t, &req, 1); + err = errmsg(rep); + if (err) { + if (!strcmp(err, "EAGAIN")) { + *retry = 1; + free(err); + return NULL; + } else { + return err; + } + } + free(rep); + return NULL; +} + +int xenbus_read_integer(const char *path) +{ + char *res, *buf; + int t; + + res = xenbus_read(XBT_NIL, path, &buf); + if (res) { + printk("Failed to read %s.\n", path); + free(res); + return -1; + } + sscanf(buf, "%d", &t); + free(buf); + return t; +} + +int xenbus_read_uuid(const char* path, unsigned char uuid[16]) { + char * res, *buf; + res = xenbus_read(XBT_NIL, path, &buf); + if(res) { + printk("Failed to read %s.\n", path); + free(res); + return 0; + } + if(strlen(buf) != ((2*16)+4) /* 16 hex bytes and 4 hyphens */ + || sscanf(buf, + "%2hhx%2hhx%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx", + uuid, uuid + 1, uuid + 2, uuid + 3, + uuid + 4, uuid + 5, uuid + 6, uuid + 7, + uuid + 8, uuid + 9, uuid + 10, uuid + 11, + uuid + 12, uuid + 13, uuid + 14, uuid + 15) != 16) { + printk("Xenbus path %s value %s is not a uuid!\n", path, buf); + free(buf); + return 0; + } + free(buf); + return 1; +} + +char* xenbus_printf(xenbus_transaction_t xbt, + const char* node, const char* path, + const char* fmt, ...) +{ +#define BUFFER_SIZE 256 + char fullpath[BUFFER_SIZE]; + char val[BUFFER_SIZE]; + va_list args; + + BUG_ON(strlen(node) + strlen(path) + 1 >= BUFFER_SIZE); + sprintf(fullpath,"%s/%s", node, path); + va_start(args, fmt); + vsprintf(val, fmt, args); + va_end(args); + return xenbus_write(xbt,fullpath,val); +} + +domid_t xenbus_get_self_id(void) +{ + char *dom_id; + domid_t ret; + + BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id)); + sscanf(dom_id, "%"SCNd16, &ret); + + return ret; +} + +static void do_ls_test(const char *pre) +{ + char **dirs, *msg; + int x; + + printk("ls %s...\n", pre); + msg = xenbus_ls(XBT_NIL, pre, &dirs); + if (msg) { + printk("Error in xenbus ls: %s\n", msg); + free(msg); + return; + } + for (x = 0; dirs[x]; x++) + { + printk("ls %s[%d] -> %s\n", pre, x, dirs[x]); + free(dirs[x]); + } + free(dirs); +} + +static void do_read_test(const char *path) +{ + char *res, *msg; + printk("Read %s...\n", path); + msg = xenbus_read(XBT_NIL, path, &res); + if (msg) { + printk("Error in xenbus read: %s\n", msg); + free(msg); + return; + } + printk("Read %s -> %s.\n", path, res); + free(res); +} + +static void do_write_test(const char *path, const char *val) +{ + char *msg; + printk("Write %s to %s...\n", val, path); + msg = xenbus_write(XBT_NIL, path, val); + if (msg) { + printk("Result %s\n", msg); + free(msg); + } else { + printk("Success.\n"); + } +} + +static void do_rm_test(const char *path) +{ + char *msg; + printk("rm %s...\n", path); + msg = xenbus_rm(XBT_NIL, path); + if (msg) { + printk("Result %s\n", msg); + free(msg); + } else { + printk("Success.\n"); + } +} + +/* Simple testing thing */ +void test_xenbus(void) +{ + printk("Doing xenbus test.\n"); + xenbus_debug_msg("Testing xenbus...\n"); + + printk("Doing ls test.\n"); + do_ls_test("device"); + do_ls_test("device/vif"); + do_ls_test("device/vif/0"); + + printk("Doing read test.\n"); + do_read_test("device/vif/0/mac"); + do_read_test("device/vif/0/backend"); + + printk("Doing write test.\n"); + do_write_test("device/vif/0/flibble", "flobble"); + do_read_test("device/vif/0/flibble"); + do_write_test("device/vif/0/flibble", "widget"); + do_read_test("device/vif/0/flibble"); + + printk("Doing rm test.\n"); + do_rm_test("device/vif/0/flibble"); + do_read_test("device/vif/0/flibble"); + printk("(Should have said ENOENT)\n"); +} + +/* + * Local variables: + * mode: C + * c-basic-offset: 4 + * End: + */ diff -Nru xen-4.6.0/.gitignore xen-4.6.5/.gitignore --- xen-4.6.0/.gitignore 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/.gitignore 2017-03-07 16:19:05.000000000 +0000 @@ -199,6 +199,7 @@ tools/xenmon/xenbaked tools/xenpaging/xenpaging tools/xenpmd/xenpmd +tools/xenstat/libxenstat/src/_paths.h tools/xenstat/xentop/xentop tools/xenstore/init-xenstore-domain tools/xenstore/xenstore diff -Nru xen-4.6.0/m4/systemd.m4 xen-4.6.5/m4/systemd.m4 --- xen-4.6.0/m4/systemd.m4 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/m4/systemd.m4 2017-03-07 16:19:05.000000000 +0000 @@ -41,7 +41,9 @@ ]) AC_DEFUN([AX_CHECK_SYSTEMD_LIBS], [ - PKG_CHECK_MODULES([SYSTEMD], [libsystemd-daemon]) + PKG_CHECK_MODULES([SYSTEMD], [libsystemd-daemon],, + [PKG_CHECK_MODULES([SYSTEMD], [libsystemd >= 209])] + ) dnl pkg-config older than 0.24 does not set these for dnl PKG_CHECK_MODULES() worth also noting is that as of version 208 dnl of systemd pkg-config --cflags currently yields no extra flags yet. @@ -94,8 +96,10 @@ ]) AC_DEFUN([AX_CHECK_SYSTEMD_ENABLE_AVAILABLE], [ - PKG_CHECK_MODULES([SYSTEMD], [libsystemd-daemon], [systemd="y"], - [systemd="n"]) + PKG_CHECK_MODULES([SYSTEMD], [libsystemd-daemon], [systemd="y"],[ + PKG_CHECK_MODULES([SYSTEMD], [libsystemd >= 209], + [systemd="y"],[systemd="n"]) + ]) ]) dnl Enables systemd by default and requires a --disable-systemd option flag diff -Nru xen-4.6.0/README xen-4.6.5/README --- xen-4.6.0/README 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/README 2017-03-07 16:19:05.000000000 +0000 @@ -1,10 +1,10 @@ ################################# -__ __ _ _ __ ___ -\ \/ /___ _ __ | || | / /_ / _ \ - \ // _ \ '_ \ | || |_| '_ \| | | | - / \ __/ | | | |__ _| (_) | |_| | -/_/\_\___|_| |_| |_|(_)___(_)___/ - +__ __ _ _ __ +\ \/ /___ _ __ | || | / /_ + \ // _ \ '_ \ | || |_| '_ \ + / \ __/ | | | |__ _| (_) | +/_/\_\___|_| |_| |_|(_)___/ + ################################# http://www.xen.org/ diff -Nru xen-4.6.0/tools/configure xen-4.6.5/tools/configure --- xen-4.6.0/tools/configure 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/configure 2017-03-07 16:19:05.000000000 +0000 @@ -9175,6 +9175,66 @@ # Put the nasty error message in config.log where it belongs echo "$SYSTEMD_PKG_ERRORS" >&5 + + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5 +$as_echo_n "checking for SYSTEMD... " >&6; } + +if test -n "$SYSTEMD_CFLAGS"; then + pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$SYSTEMD_LIBS"; then + pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1` + else + SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$SYSTEMD_PKG_ERRORS" >&5 + systemd="n" elif test $pkg_failed = untried; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 @@ -9188,6 +9248,90 @@ systemd="y" fi +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5 +$as_echo_n "checking for SYSTEMD... " >&6; } + +if test -n "$SYSTEMD_CFLAGS"; then + pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$SYSTEMD_LIBS"; then + pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1` + else + SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$SYSTEMD_PKG_ERRORS" >&5 + + systemd="n" +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + systemd="n" +else + SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS + SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + systemd="y" +fi + +else + SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS + SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + systemd="y" +fi + if test "x$enable_systemd" != "xno"; then : @@ -9257,7 +9401,161 @@ # Put the nasty error message in config.log where it belongs echo "$SYSTEMD_PKG_ERRORS" >&5 - as_fn_error $? "Package requirements (libsystemd-daemon) were not met: + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5 +$as_echo_n "checking for SYSTEMD... " >&6; } + +if test -n "$SYSTEMD_CFLAGS"; then + pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$SYSTEMD_LIBS"; then + pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1` + else + SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$SYSTEMD_PKG_ERRORS" >&5 + + as_fn_error $? "Package requirements (libsystemd >= 209) were not met: + +$SYSTEMD_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +Alternatively, you may set the environment variables SYSTEMD_CFLAGS +and SYSTEMD_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details." "$LINENO" 5 +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +Alternatively, you may set the environment variables SYSTEMD_CFLAGS +and SYSTEMD_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details. + +To get pkg-config, see . +See \`config.log' for more details" "$LINENO" 5; } +else + SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS + SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +fi + +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5 +$as_echo_n "checking for SYSTEMD... " >&6; } + +if test -n "$SYSTEMD_CFLAGS"; then + pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$SYSTEMD_LIBS"; then + pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1` + else + SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$SYSTEMD_PKG_ERRORS" >&5 + + as_fn_error $? "Package requirements (libsystemd >= 209) were not met: $SYSTEMD_PKG_ERRORS @@ -9285,6 +9583,14 @@ else SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +fi + +else + SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS + SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } diff -Nru xen-4.6.0/tools/console/Makefile xen-4.6.5/tools/console/Makefile --- xen-4.6.0/tools/console/Makefile 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/console/Makefile 2017-03-07 16:19:05.000000000 +0000 @@ -29,7 +29,8 @@ xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c)) $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS) $(LDLIBS_xenconsoled) $(APPEND_LDFLAGS) -xenconsole: client/_paths.h $(patsubst %.c,%.o,$(wildcard client/*.c)) +client/main.o: client/_paths.h +xenconsole: $(patsubst %.c,%.o,$(wildcard client/*.c)) $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS) $(LDLIBS_xenconsole) $(APPEND_LDFLAGS) genpath-target = $(call buildmakevars2header,client/_paths.h) diff -Nru xen-4.6.0/tools/firmware/hvmloader/e820.c xen-4.6.5/tools/firmware/hvmloader/e820.c --- xen-4.6.0/tools/firmware/hvmloader/e820.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/firmware/hvmloader/e820.c 2017-03-07 16:19:05.000000000 +0000 @@ -99,6 +99,7 @@ ((uint64_t)hvm_info->high_mem_pgend << PAGE_SHIFT) - memory_map.map[i].addr; memory_map.map[i].type = E820_RAM; + memory_map.nr_map++; } } diff -Nru xen-4.6.0/tools/firmware/hvmloader/util.c xen-4.6.5/tools/firmware/hvmloader/util.c --- xen-4.6.0/tools/firmware/hvmloader/util.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/firmware/hvmloader/util.c 2017-03-07 16:19:05.000000000 +0000 @@ -478,7 +478,7 @@ if ( align < 16 ) align = 16; - s = (scratch_start + align - 1) & ~(align - 1); + s = (scratch_start + align) & ~(align - 1); e = s + size - 1; BUG_ON(e < s); diff -Nru xen-4.6.0/tools/hotplug/FreeBSD/rc.d/xencommons.in xen-4.6.5/tools/hotplug/FreeBSD/rc.d/xencommons.in --- xen-4.6.0/tools/hotplug/FreeBSD/rc.d/xencommons.in 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/hotplug/FreeBSD/rc.d/xencommons.in 2017-03-07 16:19:05.000000000 +0000 @@ -11,18 +11,22 @@ export LD_LIBRARY_PATH name="xencommons" +rcvar="xencommons_enable" start_precmd="xen_precmd" start_cmd="xen_startcmd" stop_cmd="xen_stop" status_cmd="xen_status" extra_commands="status" -required_files="/dev/xen/privcmd" +required_files="/dev/xen/xenstored" XENSTORED_PIDFILE="/var/run/xenstored.pid" XENCONSOLED_PIDFILE="/var/run/xenconsoled.pid" #XENCONSOLED_TRACE="/var/log/xen/xenconsole-trace.log" #XENSTORED_TRACE="/var/log/xen/xenstore-trace.log" +load_rc_config $name +: ${xencommons_enable:=no} + xen_precmd() { mkdir -p /var/run/xenstored || exit 1 @@ -116,5 +120,4 @@ fi } -load_rc_config $name run_rc_command "$1" diff -Nru xen-4.6.0/tools/hotplug/FreeBSD/rc.d/xendriverdomain.in xen-4.6.5/tools/hotplug/FreeBSD/rc.d/xendriverdomain.in --- xen-4.6.0/tools/hotplug/FreeBSD/rc.d/xendriverdomain.in 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/hotplug/FreeBSD/rc.d/xendriverdomain.in 2017-03-07 16:19:05.000000000 +0000 @@ -13,23 +13,21 @@ export LD_LIBRARY_PATH name="xendriverdomain" -start_precmd="xendriverdomain_precmd" -start_cmd="xendriverdomain_startcmd" +rcvar="xendriverdomain_enable" +start_cmd="xendriverdomain_start" stop_cmd="xendriverdomain_stop" extra_commands="" XLDEVD_PIDFILE="/var/run/xldevd.pid" -xendriverdomain_precmd() -{ - : -} +load_rc_config $name +: ${xendriverdomain_enable:=no} -xendriverdomain_startcmd() +xendriverdomain_start() { printf "Starting xenservices: xl devd." - ${sbindir}/xl devd --pidfile=$XLDEVD_PIDFILE ${XLDEVD_ARGS} + PATH="${bindir}:${sbindir}:$PATH" ${sbindir}/xl devd --pidfile ${XLDEVD_PIDFILE} ${XLDEVD_ARGS} printf "\n" } @@ -40,9 +38,8 @@ rc_pid=$(check_pidfile ${XLDEVD_PIDFILE} ${sbindir}/xl) - kill -${sig_stop:-TERM} $rc_pids - wait_for_pids $rc_pids + kill -${sig_stop:-TERM} $rc_pid + wait_for_pids $rc_pid } -load_rc_config $name run_rc_command "$1" diff -Nru xen-4.6.0/tools/libvchan/io.c xen-4.6.5/tools/libvchan/io.c --- xen-4.6.0/tools/libvchan/io.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libvchan/io.c 2017-03-07 16:19:05.000000000 +0000 @@ -117,6 +117,7 @@ static inline int raw_get_data_ready(struct libxenvchan *ctrl) { uint32_t ready = rd_prod(ctrl) - rd_cons(ctrl); + xen_mb(); /* Ensure 'ready' is read only once. */ if (ready > rd_ring_size(ctrl)) /* We have no way to return errors. Locking up the ring is * better than the alternatives. */ @@ -158,6 +159,7 @@ static inline int raw_get_buffer_space(struct libxenvchan *ctrl) { uint32_t ready = wr_ring_size(ctrl) - (wr_prod(ctrl) - wr_cons(ctrl)); + xen_mb(); /* Ensure 'ready' is read only once. */ if (ready > wr_ring_size(ctrl)) /* We have no way to return errors. Locking up the ring is * better than the alternatives. */ diff -Nru xen-4.6.0/tools/libxc/xc_cpuid_x86.c xen-4.6.5/tools/libxc/xc_cpuid_x86.c --- xen-4.6.0/tools/libxc/xc_cpuid_x86.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxc/xc_cpuid_x86.c 2017-03-07 16:19:05.000000000 +0000 @@ -123,7 +123,8 @@ * ECX[15:12] is ApicIdCoreSize: ECX[7:0] is NumberOfCores (minus one). * Update to reflect vLAPIC_ID = vCPU_ID * 2. */ - regs[2] = ((regs[2] & 0xf000u) + 1) | ((regs[2] & 0xffu) << 1) | 1u; + regs[2] = ((regs[2] + (1u << 12)) & 0xf000u) | + ((regs[2] & 0xffu) << 1) | 1u; break; case 0x8000000a: { diff -Nru xen-4.6.0/tools/libxc/xc_dom_bzimageloader.c xen-4.6.5/tools/libxc/xc_dom_bzimageloader.c --- xen-4.6.0/tools/libxc/xc_dom_bzimageloader.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxc/xc_dom_bzimageloader.c 2017-03-07 16:19:05.000000000 +0000 @@ -482,7 +482,7 @@ if ( !dst_len ) { msg = "Error registering stream output"; - if ( xc_dom_register_external(dom, out_buf, out_len) ) + if ( xc_dom_register_external(dom, out_buf, *size) ) break; return 0; diff -Nru xen-4.6.0/tools/libxc/xc_sr_save_x86_hvm.c xen-4.6.5/tools/libxc/xc_sr_save_x86_hvm.c --- xen-4.6.0/tools/libxc/xc_sr_save_x86_hvm.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxc/xc_sr_save_x86_hvm.c 2017-03-07 16:19:05.000000000 +0000 @@ -76,6 +76,7 @@ HVM_PARAM_VM_GENERATION_ID_ADDR, HVM_PARAM_IOREQ_SERVER_PFN, HVM_PARAM_NR_IOREQ_SERVER_PAGES, + HVM_PARAM_X87_FIP_WIDTH, }; xc_interface *xch = ctx->xch; diff -Nru xen-4.6.0/tools/libxc/xc_sr_save_x86_pv.c xen-4.6.5/tools/libxc/xc_sr_save_x86_pv.c --- xen-4.6.0/tools/libxc/xc_sr_save_x86_pv.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxc/xc_sr_save_x86_pv.c 2017-03-07 16:19:05.000000000 +0000 @@ -695,9 +695,14 @@ #ifdef __i386__ if ( mfn == INVALID_MFN ) { - ERROR("PTE truncation detected. L%lu[%u] = %016"PRIx64, - type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte); - errno = E2BIG; + if ( !ctx->dominfo.paused ) + errno = EAGAIN; + else + { + ERROR("PTE truncation detected. L%lu[%u] = %016"PRIx64, + type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte); + errno = E2BIG; + } return -1; } #endif diff -Nru xen-4.6.0/tools/libxl/libxl.c xen-4.6.5/tools/libxl/libxl.c --- xen-4.6.0/tools/libxl/libxl.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxl/libxl.c 2017-03-07 16:19:05.000000000 +0000 @@ -21,10 +21,10 @@ #define PAGE_TO_MEMKB(pages) ((pages) * 4) #define BACKEND_STRING_SIZE 5 -/* Utility to read backend xenstore keys */ -#define READ_BACKEND(tgc, subpath) ({ \ +/* Utility to read /libxl xenstore keys, from libxl_path */ +#define READ_LIBXLDEV(tgc, subpath) ({ \ rc = libxl__xs_read_checked(tgc, XBT_NULL, \ - GCSPRINTF("%s/" subpath, be_path), \ + GCSPRINTF("%s/" subpath, libxl_path), \ &tmp); \ if (rc) goto out; \ (char*)tmp; \ @@ -1336,9 +1336,10 @@ const char *wpath, const char *epath) { EGC_GC; libxl_evgen_disk_eject *evg = (void*)w; - char *backend; + const char *backend; char *value; char backend_type[BACKEND_STRING_SIZE+1]; + int rc; value = libxl__xs_read(gc, XBT_NULL, wpath); @@ -1354,9 +1355,16 @@ libxl_event *ev = NEW_EVENT(egc, DISK_EJECT, evg->domid, evg->user); libxl_device_disk *disk = &ev->u.disk_eject.disk; - backend = libxl__xs_read(gc, XBT_NULL, - libxl__sprintf(gc, "%.*s/backend", - (int)strlen(wpath)-6, wpath)); + rc = libxl__xs_read_checked(gc, XBT_NULL, evg->be_ptr_path, &backend); + if (rc) { + LIBXL__EVENT_DISASTER(egc, "xs_read failed reading be_ptr_path", + errno, LIBXL_EVENT_TYPE_DISK_EJECT); + return; + } + if (!backend) { + /* device has been removed, not simply ejected */ + return; + } sscanf(backend, "/local/domain/%d/backend/%" TOSTRING(BACKEND_STRING_SIZE) @@ -1373,8 +1381,7 @@ disk->pdev_path = strdup(""); /* xxx fixme malloc failure */ disk->format = LIBXL_DISK_FORMAT_EMPTY; /* this value is returned to the user: do not free right away */ - disk->vdev = xs_read(CTX->xsh, XBT_NULL, - libxl__sprintf(gc, "%s/dev", backend), NULL); + disk->vdev = libxl__strdup(NOGC, evg->vdev); disk->removable = 1; disk->readwrite = 0; disk->is_cdrom = 1; @@ -1397,19 +1404,30 @@ evg->domid = guest_domid; LIBXL_LIST_INSERT_HEAD(&CTX->disk_eject_evgens, evg, entry); - evg->vdev = strdup(vdev); - if (!evg->vdev) { rc = ERROR_NOMEM; goto out; } - uint32_t domid = libxl_get_stubdom_id(ctx, guest_domid); if (!domid) domid = guest_domid; - path = libxl__sprintf(gc, "%s/device/vbd/%d/eject", + int devid = libxl__device_disk_dev_number(vdev, NULL, NULL); + + path = GCSPRINTF("%s/device/vbd/%d/eject", libxl__xs_get_dompath(gc, domid), - libxl__device_disk_dev_number(vdev, NULL, NULL)); + devid); if (!path) { rc = ERROR_NOMEM; goto out; } + const char *libxl_path = GCSPRINTF("%s/device/vbd/%d", + libxl__xs_libxl_path(gc, domid), + devid); + evg->be_ptr_path = libxl__sprintf(NOGC, "%s/backend", libxl_path); + + const char *configured_vdev; + rc = libxl__xs_read_checked(gc, XBT_NULL, + GCSPRINTF("%s/dev", libxl_path), &configured_vdev); + if (rc) goto out; + + evg->vdev = libxl__strdup(NOGC, configured_vdev); + rc = libxl__ev_xswatch_register(gc, &evg->watch, disk_eject_xswatch_callback, path); if (rc) goto out; @@ -1436,6 +1454,7 @@ libxl__ev_xswatch_deregister(gc, &evg->watch); free(evg->vdev); + free(evg->be_ptr_path); free(evg); CTX_UNLOCK; @@ -1794,7 +1813,7 @@ switch (type) { case LIBXL_CONSOLE_TYPE_SERIAL: - tty_path = GCSPRINTF("%s/serial/0/tty", dom_path); + tty_path = GCSPRINTF("%s/serial/%d/tty", dom_path, cons_num); break; case LIBXL_CONSOLE_TYPE_PV: if (cons_num == 0) @@ -2008,15 +2027,16 @@ /* common function to get next device id */ static int libxl__device_nextid(libxl__gc *gc, uint32_t domid, char *device) { - char *dompath, **l; + char *libxl_dom_path, **l; unsigned int nb; int nextid = -1; - if (!(dompath = libxl__xs_get_dompath(gc, domid))) + if (!(libxl_dom_path = libxl__xs_libxl_path(gc, domid))) return nextid; l = libxl__xs_directory(gc, XBT_NULL, - GCSPRINTF("%s/device/%s", dompath, device), &nb); + GCSPRINTF("%s/device/%s", libxl_dom_path, device), + &nb); if (l == NULL || nb == 0) nextid = 0; else @@ -2179,14 +2199,15 @@ GC_INIT(ctx); libxl_device_vtpm* vtpms = NULL; - char* fe_path = NULL; + char *libxl_path; char** dir = NULL; unsigned int ndirs = 0; + int rc; *num = 0; - fe_path = libxl__sprintf(gc, "%s/device/vtpm", libxl__xs_get_dompath(gc, domid)); - dir = libxl__xs_directory(gc, XBT_NULL, fe_path, &ndirs); + libxl_path = GCSPRINTF("%s/device/vtpm", libxl__xs_libxl_path(gc, domid)); + dir = libxl__xs_directory(gc, XBT_NULL, libxl_path, &ndirs); if (dir && ndirs) { vtpms = malloc(sizeof(*vtpms) * ndirs); libxl_device_vtpm* vtpm; @@ -2195,18 +2216,17 @@ char* tmp; const char* be_path = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/%s/backend", - fe_path, *dir)); + libxl_path, *dir)); libxl_device_vtpm_init(vtpm); vtpm->devid = atoi(*dir); - tmp = libxl__xs_read(gc, XBT_NULL, - GCSPRINTF("%s/%s/backend-id", - fe_path, *dir)); - vtpm->backend_domid = atoi(tmp); + rc = libxl__backendpath_parse_domid(gc, be_path, + &vtpm->backend_domid); + if (rc) return NULL; - tmp = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/uuid", be_path)); + tmp = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/uuid", libxl_path)); if (tmp) { if(libxl_uuid_from_string(&(vtpm->uuid), tmp)) { LOG(ERROR, "%s/uuid is a malformed uuid?? (%s) Probably a bug!!\n", be_path, tmp); @@ -2228,7 +2248,7 @@ libxl_vtpminfo *vtpminfo) { GC_INIT(ctx); - char *dompath, *vtpmpath; + char *libxl_path, *dompath, *vtpmpath; char *val; int rc = 0; @@ -2237,18 +2257,17 @@ vtpminfo->devid = vtpm->devid; vtpmpath = GCSPRINTF("%s/device/vtpm/%d", dompath, vtpminfo->devid); + libxl_path = GCSPRINTF("%s/device/vtpm/%d", + libxl__xs_libxl_path(gc, domid), vtpminfo->devid); vtpminfo->backend = xs_read(ctx->xsh, XBT_NULL, - GCSPRINTF("%s/backend", vtpmpath), NULL); + GCSPRINTF("%s/backend", libxl_path), NULL); if (!vtpminfo->backend) { goto err; } - if(!libxl__xs_read(gc, XBT_NULL, vtpminfo->backend)) { - goto err; - } - val = libxl__xs_read(gc, XBT_NULL, - GCSPRINTF("%s/backend-id", vtpmpath)); - vtpminfo->backend_id = val ? strtoul(val, NULL, 10) : -1; + rc = libxl__backendpath_parse_domid(gc, vtpminfo->backend, + &vtpminfo->backend_id); + if (rc) goto exit; val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/state", vtpmpath)); @@ -2263,14 +2282,11 @@ vtpminfo->rref = val ? strtoul(val, NULL, 10) : -1; vtpminfo->frontend = xs_read(ctx->xsh, XBT_NULL, - GCSPRINTF("%s/frontend", vtpminfo->backend), NULL); - - val = libxl__xs_read(gc, XBT_NULL, - GCSPRINTF("%s/frontend-id", vtpminfo->backend)); - vtpminfo->frontend_id = val ? strtoul(val, NULL, 10) : -1; + GCSPRINTF("%s/frontend", libxl_path), NULL); + vtpminfo->frontend_id = domid; val = libxl__xs_read(gc, XBT_NULL, - GCSPRINTF("%s/uuid", vtpminfo->backend)); + GCSPRINTF("%s/uuid", libxl_path)); if(val == NULL) { LOG(ERROR, "%s/uuid does not exist!", vtpminfo->backend); goto err; @@ -2626,8 +2642,8 @@ device_disk_add(egc, domid, disk, aodev, NULL, NULL); } -static int libxl__device_disk_from_xs_be(libxl__gc *gc, - const char *be_path, +static int libxl__device_disk_from_xenstore(libxl__gc *gc, + const char *libxl_path, libxl_device_disk *disk) { libxl_ctx *ctx = libxl__gc_owner(gc); @@ -2637,15 +2653,27 @@ libxl_device_disk_init(disk); - rc = sscanf(be_path, "/local/domain/%d/", &disk->backend_domid); - if (rc != 1) { - LOG(ERROR, "Unable to fetch device backend domid from %s", be_path); - goto cleanup; + const char *backend_path; + rc = libxl__xs_read_checked(gc, XBT_NULL, + GCSPRINTF("%s/backend", libxl_path), + &backend_path); + if (rc) goto out; + + if (!backend_path) { + LOG(ERROR, "disk %s does not exist (no backend path", libxl_path); + rc = ERROR_FAIL; + goto out; + } + + rc = libxl__backendpath_parse_domid(gc, backend_path, &disk->backend_domid); + if (rc) { + LOG(ERROR, "Unable to fetch device backend domid from %s", backend_path); + goto out; } /* "params" may not be present; but everything else must be. */ tmp = xs_read(ctx->xsh, XBT_NULL, - libxl__sprintf(gc, "%s/params", be_path), &len); + libxl__sprintf(gc, "%s/params", libxl_path), &len); if (tmp && strchr(tmp, ':')) { disk->pdev_path = strdup(strchr(tmp, ':') + 1); free(tmp); @@ -2655,31 +2683,31 @@ tmp = libxl__xs_read(gc, XBT_NULL, - libxl__sprintf(gc, "%s/type", be_path)); + libxl__sprintf(gc, "%s/type", libxl_path)); if (!tmp) { - LOG(ERROR, "Missing xenstore node %s/type", be_path); + LOG(ERROR, "Missing xenstore node %s/type", libxl_path); goto cleanup; } libxl_string_to_backend(ctx, tmp, &(disk->backend)); disk->vdev = xs_read(ctx->xsh, XBT_NULL, - libxl__sprintf(gc, "%s/dev", be_path), &len); + libxl__sprintf(gc, "%s/dev", libxl_path), &len); if (!disk->vdev) { - LOG(ERROR, "Missing xenstore node %s/dev", be_path); + LOG(ERROR, "Missing xenstore node %s/dev", libxl_path); goto cleanup; } tmp = libxl__xs_read(gc, XBT_NULL, libxl__sprintf - (gc, "%s/removable", be_path)); + (gc, "%s/removable", libxl_path)); if (!tmp) { - LOG(ERROR, "Missing xenstore node %s/removable", be_path); + LOG(ERROR, "Missing xenstore node %s/removable", libxl_path); goto cleanup; } disk->removable = atoi(tmp); - tmp = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/mode", be_path)); + tmp = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/mode", libxl_path)); if (!tmp) { - LOG(ERROR, "Missing xenstore node %s/mode", be_path); + LOG(ERROR, "Missing xenstore node %s/mode", libxl_path); goto cleanup; } if (!strcmp(tmp, "w")) @@ -2688,9 +2716,9 @@ disk->readwrite = 0; tmp = libxl__xs_read(gc, XBT_NULL, - libxl__sprintf(gc, "%s/device-type", be_path)); + libxl__sprintf(gc, "%s/device-type", libxl_path)); if (!tmp) { - LOG(ERROR, "Missing xenstore node %s/device-type", be_path); + LOG(ERROR, "Missing xenstore node %s/device-type", libxl_path); goto cleanup; } disk->is_cdrom = !strcmp(tmp, "cdrom"); @@ -2699,15 +2727,17 @@ return 0; cleanup: + rc = ERROR_FAIL; + out: libxl_device_disk_dispose(disk); - return ERROR_FAIL; + return rc; } int libxl_vdev_to_device_disk(libxl_ctx *ctx, uint32_t domid, const char *vdev, libxl_device_disk *disk) { GC_INIT(ctx); - char *dompath, *path; + char *dom_xl_path, *libxl_path; int devid = libxl__device_disk_dev_number(vdev, NULL, NULL); int rc = ERROR_FAIL; @@ -2716,39 +2746,34 @@ libxl_device_disk_init(disk); - dompath = libxl__xs_get_dompath(gc, domid); - if (!dompath) { + dom_xl_path = libxl__xs_libxl_path(gc, domid); + if (!dom_xl_path) { goto out; } - path = libxl__xs_read(gc, XBT_NULL, - libxl__sprintf(gc, "%s/device/vbd/%d/backend", - dompath, devid)); - if (!path) - goto out; + libxl_path = GCSPRINTF("%s/device/vbd/%d", dom_xl_path, devid); - rc = libxl__device_disk_from_xs_be(gc, path, disk); + rc = libxl__device_disk_from_xenstore(gc, libxl_path, disk); out: GC_FREE; return rc; } -static int libxl__append_disk_list_of_type(libxl__gc *gc, +static int libxl__append_disk_list(libxl__gc *gc, uint32_t domid, - const char *type, libxl_device_disk **disks, int *ndisks) { - char *be_path = NULL; + char *libxl_dir_path = NULL; char **dir = NULL; unsigned int n = 0; libxl_device_disk *pdisk = NULL, *pdisk_end = NULL; int rc=0; int initial_disks = *ndisks; - be_path = libxl__sprintf(gc, "%s/backend/%s/%d", - libxl__xs_get_dompath(gc, 0), type, domid); - dir = libxl__xs_directory(gc, XBT_NULL, be_path, &n); + libxl_dir_path = GCSPRINTF("%s/device/vbd", + libxl__xs_libxl_path(gc, domid)); + dir = libxl__xs_directory(gc, XBT_NULL, libxl_dir_path, &n); if (dir && n) { libxl_device_disk *tmp; tmp = realloc(*disks, sizeof (libxl_device_disk) * (*ndisks + n)); @@ -2759,10 +2784,9 @@ pdisk_end = *disks + initial_disks + n; for (; pdisk < pdisk_end; pdisk++, dir++) { const char *p; - p = libxl__sprintf(gc, "%s/%s", be_path, *dir); - if ((rc=libxl__device_disk_from_xs_be(gc, p, pdisk))) + p = libxl__sprintf(gc, "%s/%s", libxl_dir_path, *dir); + if ((rc=libxl__device_disk_from_xenstore(gc, p, pdisk))) goto out; - pdisk->backend_domid = 0; *ndisks += 1; } } @@ -2778,13 +2802,7 @@ *num = 0; - rc = libxl__append_disk_list_of_type(gc, domid, "vbd", &disks, num); - if (rc) goto out_err; - - rc = libxl__append_disk_list_of_type(gc, domid, "tap", &disks, num); - if (rc) goto out_err; - - rc = libxl__append_disk_list_of_type(gc, domid, "qdisk", &disks, num); + rc = libxl__append_disk_list(gc, domid, &disks, num); if (rc) goto out_err; GC_FREE; @@ -2804,35 +2822,45 @@ libxl_device_disk *disk, libxl_diskinfo *diskinfo) { GC_INIT(ctx); - char *dompath, *diskpath; + char *dompath, *fe_path, *libxl_path; char *val; + int rc; + + diskinfo->backend = NULL; dompath = libxl__xs_get_dompath(gc, domid); diskinfo->devid = libxl__device_disk_dev_number(disk->vdev, NULL, NULL); /* tap devices entries in xenstore are written as vbd devices. */ - diskpath = libxl__sprintf(gc, "%s/device/vbd/%d", dompath, diskinfo->devid); + fe_path = GCSPRINTF("%s/device/vbd/%d", dompath, diskinfo->devid); + libxl_path = GCSPRINTF("%s/device/vbd/%d", + libxl__xs_libxl_path(gc, domid), diskinfo->devid); diskinfo->backend = xs_read(ctx->xsh, XBT_NULL, - libxl__sprintf(gc, "%s/backend", diskpath), NULL); + GCSPRINTF("%s/backend", libxl_path), NULL); if (!diskinfo->backend) { GC_FREE; return ERROR_FAIL; } - val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/backend-id", diskpath)); - diskinfo->backend_id = val ? strtoul(val, NULL, 10) : -1; - val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/state", diskpath)); + rc = libxl__backendpath_parse_domid(gc, diskinfo->backend, + &diskinfo->backend_id); + if (rc) goto out; + + val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/state", fe_path)); diskinfo->state = val ? strtoul(val, NULL, 10) : -1; - val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/event-channel", diskpath)); + val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/event-channel", fe_path)); diskinfo->evtch = val ? strtoul(val, NULL, 10) : -1; - val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/ring-ref", diskpath)); + val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/ring-ref", fe_path)); diskinfo->rref = val ? strtoul(val, NULL, 10) : -1; diskinfo->frontend = xs_read(ctx->xsh, XBT_NULL, - libxl__sprintf(gc, "%s/frontend", diskinfo->backend), NULL); - val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/frontend-id", diskinfo->backend)); - diskinfo->frontend_id = val ? strtoul(val, NULL, 10) : -1; + GCSPRINTF("%s/frontend", libxl_path), NULL); + diskinfo->frontend_id = domid; GC_FREE; return 0; + + out: + free(diskinfo->backend); + return rc; } int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk, @@ -2844,7 +2872,7 @@ libxl_domain_config d_config; int rc, dm_ver; libxl__device device; - const char * path; + const char *be_path, *libxl_path; char * tmp; libxl__domain_userdata_lock *lock = NULL; xs_transaction_t t = XBT_NULL; @@ -2911,7 +2939,8 @@ rc = libxl__device_from_disk(gc, domid, disk, &device); if (rc) goto out; - path = libxl__device_backend_path(gc, &device); + be_path = libxl__device_backend_path(gc, &device); + libxl_path = libxl__device_libxl_path(gc, &device); insert = flexarray_make(gc, 4, 1); @@ -2950,18 +2979,22 @@ for (;;) { rc = libxl__xs_transaction_start(gc, &t); if (rc) goto out; - /* Sanity check: make sure the backend exists before writing here */ - tmp = libxl__xs_read(gc, t, libxl__sprintf(gc, "%s/frontend", path)); + /* Sanity check: make sure the device exists before writing here */ + tmp = libxl__xs_read(gc, t, GCSPRINTF("%s/frontend", libxl_path)); if (!tmp) { LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Internal error: %s does not exist", - libxl__sprintf(gc, "%s/frontend", path)); + libxl__sprintf(gc, "%s/frontend", libxl_path)); rc = ERROR_FAIL; goto out; } - rc = libxl__xs_writev(gc, t, path, - libxl__xs_kvs_of_flexarray(gc, empty, empty->count)); + char **kvs = libxl__xs_kvs_of_flexarray(gc, empty, empty->count); + + rc = libxl__xs_writev(gc, t, be_path, kvs); + if (rc) goto out; + + rc = libxl__xs_writev(gc, t, libxl_path, kvs); if (rc) goto out; rc = libxl__xs_transaction_commit(gc, &t); @@ -2982,12 +3015,12 @@ for (;;) { rc = libxl__xs_transaction_start(gc, &t); if (rc) goto out; - /* Sanity check: make sure the backend exists before writing here */ - tmp = libxl__xs_read(gc, t, libxl__sprintf(gc, "%s/frontend", path)); + /* Sanity check: make sure the device exists before writing here */ + tmp = libxl__xs_read(gc, t, GCSPRINTF("%s/frontend", libxl_path)); if (!tmp) { LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Internal error: %s does not exist", - libxl__sprintf(gc, "%s/frontend", path)); + libxl__sprintf(gc, "%s/frontend", libxl_path)); rc = ERROR_FAIL; goto out; } @@ -2995,8 +3028,12 @@ rc = libxl__set_domain_configuration(gc, domid, &d_config); if (rc) goto out; - rc = libxl__xs_writev(gc, t, path, - libxl__xs_kvs_of_flexarray(gc, insert, insert->count)); + char **kvs = libxl__xs_kvs_of_flexarray(gc, insert, insert->count); + + rc = libxl__xs_writev(gc, t, be_path, kvs); + if (rc) goto out; + + rc = libxl__xs_writev(gc, t, libxl_path, kvs); if (rc) goto out; rc = libxl__xs_transaction_commit(gc, &t); @@ -3031,7 +3068,7 @@ { const char *blkdev_start = (const char *) get_vdev_user; int devid = 0, disk = 0, part = 0; - char *dompath = libxl__xs_get_dompath(gc, LIBXL_TOOLSTACK_DOMID); + char *libxl_dom_path = libxl__xs_libxl_path(gc, LIBXL_TOOLSTACK_DOMID); libxl__device_disk_dev_number(blkdev_start, &disk, &part); if (part != 0) { @@ -3046,7 +3083,7 @@ return NULL; if (libxl__xs_read(gc, t, libxl__sprintf(gc, "%s/device/vbd/%d/backend", - dompath, devid)) == NULL) { + libxl_dom_path, devid)) == NULL) { if (errno == ENOENT) return libxl__devid_to_localdev(gc, devid); else @@ -3433,8 +3470,8 @@ return; } -static int libxl__device_nic_from_xs_be(libxl__gc *gc, - const char *be_path, +static int libxl__device_nic_from_xenstore(libxl__gc *gc, + const char *libxl_path, libxl_device_nic *nic) { const char *tmp; @@ -3442,15 +3479,27 @@ libxl_device_nic_init(nic); - tmp = READ_BACKEND(gc, "handle"); + tmp = READ_LIBXLDEV(gc, "handle"); if (tmp) nic->devid = atoi(tmp); else nic->devid = 0; + rc = libxl__xs_read_checked(gc, XBT_NULL, + GCSPRINTF("%s/backend", libxl_path), &tmp); + if (rc) goto out; + + if (!tmp) { + LOG(ERROR, "nic %s does not exist (no backend path)", libxl_path); + rc = ERROR_FAIL; + goto out; + } + rc = libxl__backendpath_parse_domid(gc, tmp, &nic->backend_domid); + if (rc) goto out; + /* nic->mtu = */ - tmp = READ_BACKEND(gc, "mac"); + tmp = READ_LIBXLDEV(gc, "mac"); if (tmp) { rc = libxl__parse_mac(tmp, nic->mac); if (rc) goto out; @@ -3458,12 +3507,12 @@ memset(nic->mac, 0, sizeof(nic->mac)); } - nic->ip = READ_BACKEND(NOGC, "ip"); - nic->bridge = READ_BACKEND(NOGC, "bridge"); - nic->script = READ_BACKEND(NOGC, "script"); + nic->ip = READ_LIBXLDEV(NOGC, "ip"); + nic->bridge = READ_LIBXLDEV(NOGC, "bridge"); + nic->script = READ_LIBXLDEV(NOGC, "script"); /* vif_ioemu nics use the same xenstore entries as vif interfaces */ - tmp = READ_BACKEND(gc, "type"); + tmp = READ_LIBXLDEV(gc, "type"); if (tmp) { rc = libxl_nic_type_from_string(tmp, &nic->nictype); if (rc) goto out; @@ -3482,21 +3531,17 @@ int devid, libxl_device_nic *nic) { GC_INIT(ctx); - char *dompath, *path; + char *libxl_dom_path, *libxl_path; int rc = ERROR_FAIL; libxl_device_nic_init(nic); - dompath = libxl__xs_get_dompath(gc, domid); - if (!dompath) + libxl_dom_path = libxl__xs_libxl_path(gc, domid); + if (!libxl_dom_path) goto out; - path = libxl__xs_read(gc, XBT_NULL, - libxl__sprintf(gc, "%s/device/vif/%d/backend", - dompath, devid)); - if (!path) - goto out; + libxl_path = GCSPRINTF("%s/device/vif/%d", libxl_dom_path, devid); - rc = libxl__device_nic_from_xs_be(gc, path, nic); + rc = libxl__device_nic_from_xenstore(gc, libxl_path, nic); if (rc) goto out; rc = 0; @@ -3505,21 +3550,20 @@ return rc; } -static int libxl__append_nic_list_of_type(libxl__gc *gc, +static int libxl__append_nic_list(libxl__gc *gc, uint32_t domid, - const char *type, libxl_device_nic **nics, int *nnics) { - char *be_path = NULL; + char *libxl_dir_path = NULL; char **dir = NULL; unsigned int n = 0; libxl_device_nic *pnic = NULL, *pnic_end = NULL; int rc; - be_path = libxl__sprintf(gc, "%s/backend/%s/%d", - libxl__xs_get_dompath(gc, 0), type, domid); - dir = libxl__xs_directory(gc, XBT_NULL, be_path, &n); + libxl_dir_path = GCSPRINTF("%s/device/vif", + libxl__xs_libxl_path(gc, domid)); + dir = libxl__xs_directory(gc, XBT_NULL, libxl_dir_path, &n); if (dir && n) { libxl_device_nic *tmp; tmp = realloc(*nics, sizeof (libxl_device_nic) * (*nnics + n)); @@ -3530,10 +3574,9 @@ pnic_end = *nics + *nnics + n; for (; pnic < pnic_end; pnic++, dir++) { const char *p; - p = libxl__sprintf(gc, "%s/%s", be_path, *dir); - rc = libxl__device_nic_from_xs_be(gc, p, pnic); + p = GCSPRINTF("%s/%s", libxl_dir_path, *dir); + rc = libxl__device_nic_from_xenstore(gc, p, pnic); if (rc) goto out; - pnic->backend_domid = 0; } *nnics += n; } @@ -3551,7 +3594,7 @@ *num = 0; - rc = libxl__append_nic_list_of_type(gc, domid, "vif", &nics, num); + rc = libxl__append_nic_list(gc, domid, &nics, num); if (rc) goto out_err; GC_FREE; @@ -3571,22 +3614,27 @@ libxl_device_nic *nic, libxl_nicinfo *nicinfo) { GC_INIT(ctx); - char *dompath, *nicpath; + char *dompath, *nicpath, *libxl_path; char *val; + int rc; dompath = libxl__xs_get_dompath(gc, domid); nicinfo->devid = nic->devid; - nicpath = libxl__sprintf(gc, "%s/device/vif/%d", dompath, nicinfo->devid); + nicpath = GCSPRINTF("%s/device/vif/%d", dompath, nicinfo->devid); + libxl_path = GCSPRINTF("%s/device/vif/%d", + libxl__xs_libxl_path(gc, domid), nicinfo->devid); nicinfo->backend = xs_read(ctx->xsh, XBT_NULL, - libxl__sprintf(gc, "%s/backend", nicpath), NULL); + GCSPRINTF("%s/backend", libxl_path), NULL); if (!nicinfo->backend) { GC_FREE; return ERROR_FAIL; } - val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/backend-id", nicpath)); - nicinfo->backend_id = val ? strtoul(val, NULL, 10) : -1; - val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/state", nicpath)); + rc = libxl__backendpath_parse_domid(gc, nicinfo->backend, + &nicinfo->backend_id); + if (rc) goto out; + + val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/state", nicpath)); nicinfo->state = val ? strtoul(val, NULL, 10) : -1; val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/event-channel", nicpath)); nicinfo->evtch = val ? strtoul(val, NULL, 10) : -1; @@ -3594,13 +3642,13 @@ nicinfo->rref_tx = val ? strtoul(val, NULL, 10) : -1; val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/rx-ring-ref", nicpath)); nicinfo->rref_rx = val ? strtoul(val, NULL, 10) : -1; - nicinfo->frontend = xs_read(ctx->xsh, XBT_NULL, - libxl__sprintf(gc, "%s/frontend", nicinfo->backend), NULL); - val = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/frontend-id", nicinfo->backend)); - nicinfo->frontend_id = val ? strtoul(val, NULL, 10) : -1; + nicinfo->frontend = libxl__strdup(NOGC, nicpath); + nicinfo->frontend_id = domid; + rc = 0; + out: GC_FREE; - return 0; + return rc; } const char *libxl__device_nic_devname(libxl__gc *gc, @@ -3661,6 +3709,8 @@ if (console->name) { flexarray_append(ro_front, "name"); flexarray_append(ro_front, console->name); + flexarray_append(back, "name"); + flexarray_append(back, console->name); } if (console->connection) { flexarray_append(back, "connection"); @@ -3772,8 +3822,8 @@ return 0; } -static int libxl__device_channel_from_xs_be(libxl__gc *gc, - const char *be_path, +static int libxl__device_channel_from_xenstore(libxl__gc *gc, + const char *libxl_path, libxl_device_channel *channel) { const char *tmp; @@ -3781,14 +3831,14 @@ libxl_device_channel_init(channel); - /* READ_BACKEND is from libxl__device_nic_from_xs_be above */ - channel->name = READ_BACKEND(NOGC, "name"); - tmp = READ_BACKEND(gc, "connection"); + /* READ_BACKEND is from libxl__device_nic_from_xenstore above */ + channel->name = READ_LIBXLDEV(NOGC, "name"); + tmp = READ_LIBXLDEV(gc, "connection"); if (!strcmp(tmp, "pty")) { channel->connection = LIBXL_CHANNEL_CONNECTION_PTY; } else if (!strcmp(tmp, "socket")) { channel->connection = LIBXL_CHANNEL_CONNECTION_SOCKET; - channel->u.socket.path = READ_BACKEND(NOGC, "path"); + channel->u.socket.path = READ_LIBXLDEV(NOGC, "path"); } else { rc = ERROR_INVAL; goto out; @@ -3799,34 +3849,32 @@ return rc; } -static int libxl__append_channel_list_of_type(libxl__gc *gc, +static int libxl__append_channel_list(libxl__gc *gc, uint32_t domid, - const char *type, libxl_device_channel **channels, int *nchannels) { - char *fe_path = NULL, *be_path = NULL; + char *libxl_dir_path = NULL; char **dir = NULL; unsigned int n = 0, devid = 0; libxl_device_channel *next = NULL; int rc = 0, i; - fe_path = GCSPRINTF("%s/device/%s", - libxl__xs_get_dompath(gc, domid), type); - dir = libxl__xs_directory(gc, XBT_NULL, fe_path, &n); + libxl_dir_path = GCSPRINTF("%s/device/console", + libxl__xs_libxl_path(gc, domid)); + dir = libxl__xs_directory(gc, XBT_NULL, libxl_dir_path, &n); if (!dir || !n) goto out; for (i = 0; i < n; i++) { - const char *p, *name; + const char *libxl_path, *name; libxl_device_channel *tmp; - p = libxl__sprintf(gc, "%s/%s", fe_path, dir[i]); - name = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/name", p)); + libxl_path = GCSPRINTF("%s/%s", libxl_dir_path, dir[i]); + name = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/name", libxl_path)); /* 'channels' are consoles with names, so ignore all consoles without names */ if (!name) continue; - be_path = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/backend", p)); tmp = realloc(*channels, sizeof(libxl_device_channel) * (*nchannels + devid + 1)); if (!tmp) { @@ -3835,7 +3883,7 @@ } *channels = tmp; next = *channels + *nchannels + devid; - rc = libxl__device_channel_from_xs_be(gc, be_path, next); + rc = libxl__device_channel_from_xenstore(gc, libxl_path, next); if (rc) goto out; next->devid = devid; devid++; @@ -3857,7 +3905,7 @@ *num = 0; - rc = libxl__append_channel_list_of_type(gc, domid, "console", &channels, num); + rc = libxl__append_channel_list(gc, domid, &channels, num); if (rc) goto out_err; GC_FREE; @@ -3878,31 +3926,32 @@ libxl_channelinfo *channelinfo) { GC_INIT(ctx); - char *dompath, *fe_path; + char *dompath, *fe_path, *libxl_path; char *val; + int rc; dompath = libxl__xs_get_dompath(gc, domid); channelinfo->devid = channel->devid; - fe_path = libxl__sprintf(gc, "%s/device/console/%d", dompath, - channelinfo->devid + 1); + fe_path = GCSPRINTF("%s/device/console/%d", dompath, + channelinfo->devid + 1); + libxl_path = GCSPRINTF("%s/device/console/%d", + libxl__xs_libxl_path(gc, domid), + channelinfo->devid + 1); channelinfo->backend = xs_read(ctx->xsh, XBT_NULL, - libxl__sprintf(gc, "%s/backend", - fe_path), NULL); + GCSPRINTF("%s/backend", libxl_path), NULL); if (!channelinfo->backend) { GC_FREE; return ERROR_FAIL; } - val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/backend-id", fe_path)); - channelinfo->backend_id = val ? strtoul(val, NULL, 10) : -1; + rc = libxl__backendpath_parse_domid(gc, channelinfo->backend, + &channelinfo->backend_id); + if (rc) goto out; + val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/state", fe_path)); channelinfo->state = val ? strtoul(val, NULL, 10) : -1; - channelinfo->frontend = xs_read(ctx->xsh, XBT_NULL, - GCSPRINTF("%s/frontend", - channelinfo->backend), NULL); - val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/frontend-id", - channelinfo->backend)); - channelinfo->frontend_id = val ? strtoul(val, NULL, 10) : -1; + channelinfo->frontend = libxl__strdup(NOGC, fe_path); + channelinfo->frontend_id = domid; val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/ring-ref", fe_path)); channelinfo->rref = val ? strtoul(val, NULL, 10) : -1; val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/port", fe_path)); @@ -3912,13 +3961,36 @@ switch (channel->connection) { case LIBXL_CHANNEL_CONNECTION_PTY: val = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/tty", fe_path)); + /* + * It is obviously very wrong for this value to be in the + * frontend. But in XSA-175 we don't want to re-engineer + * this because other xenconsole code elsewhere (some + * even out of tree, perhaps) expects this node to be + * here. + * + * FE/pty is readonly for the guest. It always exists if + * FE does because libxl__device_console_add + * unconditionally creates it and nothing deletes it. + * + * The guest can delete the whole FE (which it has write + * privilege on) but the containing directories + * /local/GUEST[/device[/console]] are also RO for the + * guest. So if the guest deletes FE it cannot recreate + * it. + * + * Therefore the guest cannot cause FE/pty to contain bad + * data, although it can cause it to not exist. + */ + if (!val) val = "/NO-SUCH-PATH"; channelinfo->u.pty.path = strdup(val); break; default: break; } + rc = 0; + out: GC_FREE; - return 0; + return rc; } /******************************************************************************/ @@ -4815,7 +4887,7 @@ } rc = xc_domain_set_pod_target(ctx->xch, domid, - new_target_memkb / 4, NULL, NULL, NULL); + (new_target_memkb + LIBXL_MAXMEM_CONSTANT) / 4, NULL, NULL, NULL); if (rc != 0) { LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_domain_set_pod_target domid=%d, memkb=%d " @@ -6661,12 +6733,12 @@ LOG(ERROR, "fail to get memory target for domain %d", domid); goto out; } - /* Target memory in xenstore is different from what user has - * asked for. The difference is video_memkb. See - * libxl_set_memory_target. + + /* libxl__get_targetmem_fudge() calculates the difference from + * what is in xenstore to what we have in the domain build info. */ d_config->b_info.target_memkb = target_memkb + - d_config->b_info.video_memkb; + libxl__get_targetmem_fudge(gc, &d_config->b_info); d_config->b_info.max_memkb = max_memkb; } diff -Nru xen-4.6.0/tools/libxl/libxl_create.c xen-4.6.5/tools/libxl/libxl_create.c --- xen-4.6.0/tools/libxl/libxl_create.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxl/libxl_create.c 2017-03-07 16:19:05.000000000 +0000 @@ -591,6 +591,8 @@ xs_rm(ctx->xsh, t, libxl_path); libxl__xs_mkdir(gc, t, libxl_path, noperm, ARRAY_SIZE(noperm)); + libxl__xs_mkdir(gc, t, GCSPRINTF("%s/device", libxl_path), + noperm, ARRAY_SIZE(noperm)); xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/vm", dom_path), vm_path, strlen(vm_path)); rc = libxl__domain_rename(gc, *domid, 0, info->name, t); @@ -1283,7 +1285,6 @@ } case LIBXL_DOMAIN_TYPE_PV: { - int need_qemu = 0; libxl__device_console console; libxl__device device; @@ -1294,16 +1295,18 @@ init_console_info(gc, &console, 0); - need_qemu = libxl__need_xenpv_qemu(gc, 1, &console, + ret = libxl__need_xenpv_qemu(gc, 1, &console, d_config->num_vfbs, d_config->vfbs, d_config->num_disks, &d_config->disks[0], d_config->num_channels, &d_config->channels[0]); + if (ret < 0) + goto error_out; console.backend_domid = state->console_domid; libxl__device_console_add(gc, domid, &console, state, &device); libxl__device_console_dispose(&console); - if (need_qemu) { + if (ret) { dcs->dmss.dm.guest_domid = domid; libxl__spawn_local_dm(egc, &dcs->dmss.dm); return; @@ -1484,6 +1487,9 @@ libxl_domain_config *const d_config = dcs->guest_config; libxl_domain_config *d_config_saved = &dcs->guest_config_saved; + libxl__file_reference_unmap(&dcs->build_state.pv_kernel); + libxl__file_reference_unmap(&dcs->build_state.pv_ramdisk); + if (!rc && d_config->b_info.exec_ssidref) rc = xc_flask_relabel_domain(CTX->xch, dcs->guest_domid, d_config->b_info.exec_ssidref); diff -Nru xen-4.6.0/tools/libxl/libxl_device.c xen-4.6.5/tools/libxl/libxl_device.c --- xen-4.6.0/tools/libxl/libxl_device.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxl/libxl_device.c 2017-03-07 16:19:05.000000000 +0000 @@ -40,12 +40,21 @@ device->domid, device->devid); } +char *libxl__device_libxl_path(libxl__gc *gc, libxl__device *device) +{ + char *libxl_dom_path = libxl__xs_libxl_path(gc, device->domid); + + return GCSPRINTF("%s/device/%s/%d", libxl_dom_path, + libxl__device_kind_to_string(device->kind), + device->devid); +} + /* Returns 1 if device exists, 0 if not, ERROR_* (<0) on error. */ int libxl__device_exists(libxl__gc *gc, xs_transaction_t t, libxl__device *device) { int rc; - char *be_path = libxl__device_backend_path(gc, device); + char *be_path = libxl__device_libxl_path(gc, device); const char *dir; rc = libxl__xs_read_checked(gc, t, be_path, &dir); @@ -105,14 +114,16 @@ libxl__device *device, char **bents, char **fents, char **ro_fents) { libxl_ctx *ctx = libxl__gc_owner(gc); - char *frontend_path, *backend_path; + char *frontend_path, *backend_path, *libxl_path; struct xs_permissions frontend_perms[2]; struct xs_permissions ro_frontend_perms[2]; struct xs_permissions backend_perms[2]; int create_transaction = t == XBT_NULL; + int rc; frontend_path = libxl__device_frontend_path(gc, device); backend_path = libxl__device_backend_path(gc, device); + libxl_path = libxl__device_libxl_path(gc, device); frontend_perms[0].id = device->domid; frontend_perms[0].perms = XS_PERM_NONE; @@ -127,8 +138,22 @@ retry_transaction: if (create_transaction) t = xs_transaction_start(ctx->xsh); + /* FIXME: read frontend_path and check state before removing stuff */ + rc = libxl__xs_rm_checked(gc, t, libxl_path); + if (rc) goto out; + + rc = libxl__xs_write_checked(gc, t, GCSPRINTF("%s/frontend",libxl_path), + frontend_path); + if (rc) goto out; + + rc = libxl__xs_write_checked(gc, t, GCSPRINTF("%s/backend",libxl_path), + backend_path); + if (rc) goto out; + + /* xxx much of this function lacks error checks! */ + if (fents || ro_fents) { xs_rm(ctx->xsh, t, frontend_path); xs_mkdir(ctx->xsh, t, frontend_path); @@ -160,6 +185,29 @@ xs_write(ctx->xsh, t, GCSPRINTF("%s/frontend", backend_path), frontend_path, strlen(frontend_path)); libxl__xs_writev(gc, t, backend_path, bents); + + /* + * We make a copy of everything for the backend in the libxl + * path as well. This means we don't need to trust the + * backend. Ideally this information would not be used and we + * would use the information from the json configuration + * instead. But there are still places in libxl that try to + * reconstruct a config from xenstore. + * + * This duplication will typically produces duplicate keys + * which will go out of date, but that's OK because nothing + * reads those. For example, there is usually + * /libxl/$guest/device/$kind/$devid/state + * which starts out containing XenbusStateInitialising ("1") + * just like the copy in + * /local/domain/$driverdom/backend/$guest/$kind/$devid/state + * but which won't ever be updated. + * + * This duplication is superfluous and messy but as discussed + * the proper fix is more intrusive than we want to do now. + */ + rc = libxl__xs_writev(gc, t, libxl_path, bents); + if (rc) goto out; } if (!create_transaction) @@ -174,6 +222,11 @@ } } return 0; + + out: + if (create_transaction && t) + libxl__xs_transaction_abort(gc, &t); + return rc; } typedef struct { @@ -258,6 +311,21 @@ return 0; } +int libxl__backendpath_parse_domid(libxl__gc *gc, const char *be_path, + libxl_domid *domid_out) { + int r; + unsigned int domid_sc; + char delim_sc; + + r = sscanf(be_path, "/local/domain/%u%c", &domid_sc, &delim_sc); + if (!(r==2 && delim_sc=='/')) { + LOG(ERROR, "internal error: backend path %s unparseable!", be_path); + return ERROR_FAIL; + } + *domid_out = domid_sc; + return 0; +} + int libxl__device_disk_set_backend(libxl__gc *gc, libxl_device_disk *disk) { libxl_disk_backend ok; disk_try_backend_args a; @@ -572,6 +640,7 @@ { const char *be_path = libxl__device_backend_path(gc, dev); const char *fe_path = libxl__device_frontend_path(gc, dev); + const char *libxl_path = libxl__device_libxl_path(gc, dev); const char *tapdisk_path = GCSPRINTF("%s/%s", be_path, "tapdisk-params"); const char *tapdisk_params; xs_transaction_t t = 0; @@ -592,9 +661,10 @@ if (domid == LIBXL_TOOLSTACK_DOMID) { /* * The toolstack domain is in charge of removing the - * frontend path. + * frontend and libxl paths. */ libxl__xs_path_cleanup(gc, t, fe_path); + libxl__xs_path_cleanup(gc, t, libxl_path); } if (dev->backend_domid == domid) { /* @@ -638,7 +708,7 @@ libxl__multidev_begin(ao, multidev); multidev->callback = devices_remove_callback; - path = GCSPRINTF("/local/domain/%d/device", domid); + path = GCSPRINTF("/libxl/%d/device", domid); kinds = libxl__xs_directory(gc, XBT_NULL, path, &num_kinds); if (!kinds) { if (errno != ENOENT) { @@ -651,12 +721,12 @@ if (libxl__device_kind_from_string(kinds[i], &kind)) continue; - path = GCSPRINTF("/local/domain/%d/device/%s", domid, kinds[i]); + path = GCSPRINTF("/libxl/%d/device/%s", domid, kinds[i]); devs = libxl__xs_directory(gc, XBT_NULL, path, &num_dev_xsentries); if (!devs) continue; for (j = 0; j < num_dev_xsentries; j++) { - path = GCSPRINTF("/local/domain/%d/device/%s/%s/backend", + path = GCSPRINTF("/libxl/%d/device/%s/%s/backend", domid, kinds[i], devs[j]); path = libxl__xs_read(gc, XBT_NULL, path); GCNEW(dev); @@ -681,22 +751,6 @@ } } - /* console 0 frontend directory is not under /local/domain//device */ - path = GCSPRINTF("/local/domain/%d/console/backend", domid); - path = libxl__xs_read(gc, XBT_NULL, path); - GCNEW(dev); - if (path && strcmp(path, "") && - libxl__parse_backend_path(gc, path, dev) == 0) { - dev->domid = domid; - dev->kind = LIBXL__DEVICE_KIND_CONSOLE; - dev->devid = 0; - - /* Currently console devices can be destroyed synchronously by just - * removing xenstore entries, this is what libxl__device_destroy does. - */ - libxl__device_destroy(gc, dev); - } - out: libxl__multidev_prepared(egc, multidev, rc); } diff -Nru xen-4.6.0/tools/libxl/libxl_dm.c xen-4.6.5/tools/libxl/libxl_dm.c --- xen-4.6.0/tools/libxl/libxl_dm.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxl/libxl_dm.c 2017-03-07 16:19:05.000000000 +0000 @@ -363,6 +363,20 @@ return ERROR_FAIL; } +/* XSA-180 / CVE-2014-3672 + * + * The QEMU shipped with Xen has a bodge. It checks for + * XEN_QEMU_CONSOLE_LIMIT to see how much data QEMU is allowed + * to write to stderr. We set that to 1MB if it is not set by + * system administrator. + */ +static void libxl__set_qemu_env_for_xsa_180(libxl__gc *gc, + flexarray_t *dm_envs) +{ + if (getenv("XEN_QEMU_CONSOLE_LIMIT")) return; + flexarray_append_pair(dm_envs, "XEN_QEMU_CONSOLE_LIMIT", "1048576"); +} + const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config) { const libxl_vnc_info *vnc = NULL; @@ -413,6 +427,8 @@ dm_args = flexarray_make(gc, 16, 1); dm_envs = flexarray_make(gc, 16, 1); + libxl__set_qemu_env_for_xsa_180(gc, dm_envs); + flexarray_vappend(dm_args, dm, "-d", libxl__sprintf(gc, "%d", domid), NULL); @@ -726,6 +742,8 @@ dm_args = flexarray_make(gc, 16, 1); dm_envs = flexarray_make(gc, 16, 1); + libxl__set_qemu_env_for_xsa_180(gc, dm_envs); + flexarray_vappend(dm_args, dm, "-xen-domid", libxl__sprintf(gc, "%d", guest_domid), NULL); @@ -1872,8 +1890,8 @@ void libxl__spawn_qdisk_backend(libxl__egc *egc, libxl__dm_spawn_state *dmss) { STATE_AO_GC(dmss->spawn.ao); - flexarray_t *dm_args; - char **args; + flexarray_t *dm_args, *dm_envs; + char **args, **envs; const char *dm; int logfile_w, null = -1, rc; uint32_t domid = dmss->guest_domid; @@ -1882,6 +1900,8 @@ dm = qemu_xen_path(gc); dm_args = flexarray_make(gc, 15, 1); + dm_envs = flexarray_make(gc, 1, 1); + flexarray_vappend(dm_args, dm, "-xen-domid", GCSPRINTF("%d", domid), NULL); flexarray_append(dm_args, "-xen-attach"); @@ -1895,6 +1915,9 @@ flexarray_append(dm_args, NULL); args = (char **) flexarray_contents(dm_args); + libxl__set_qemu_env_for_xsa_180(gc, dm_envs); + envs = (char **) flexarray_contents(dm_envs); + logfile_w = libxl__create_qemu_logfile(gc, GCSPRINTF("qdisk-%u", domid)); if (logfile_w < 0) { rc = logfile_w; @@ -1932,7 +1955,7 @@ goto error; if (!rc) { /* inner child */ setsid(); - libxl__exec(gc, null, logfile_w, logfile_w, dm, args, NULL); + libxl__exec(gc, null, logfile_w, logfile_w, dm, args, envs); } return; @@ -2007,6 +2030,7 @@ GCSPRINTF("/local/domain/%d/image/device-model-pid", domid)); } +/* Return 0 if no dm needed, 1 if needed and <0 if error. */ int libxl__need_xenpv_qemu(libxl__gc *gc, int nr_consoles, libxl__device_console *consoles, int nr_vfbs, libxl_device_vfb *vfbs, diff -Nru xen-4.6.0/tools/libxl/libxl_dom.c xen-4.6.5/tools/libxl/libxl_dom.c --- xen-4.6.0/tools/libxl/libxl_dom.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxl/libxl_dom.c 2017-03-07 16:19:05.000000000 +0000 @@ -522,7 +522,8 @@ ents[0] = "memory/static-max"; ents[1] = GCSPRINTF("%"PRId64, info->max_memkb); ents[2] = "memory/target"; - ents[3] = GCSPRINTF("%"PRId64, info->target_memkb - info->video_memkb); + ents[3] = GCSPRINTF("%"PRId64, info->target_memkb - + libxl__get_targetmem_fudge(gc, info)); ents[4] = "memory/videoram"; ents[5] = GCSPRINTF("%"PRId64, info->video_memkb); ents[6] = "domid"; @@ -743,9 +744,6 @@ state->store_mfn = xc_dom_p2m_host(dom, dom->xenstore_pfn); } - libxl__file_reference_unmap(&state->pv_kernel); - libxl__file_reference_unmap(&state->pv_ramdisk); - ret = 0; out: xc_dom_release(dom); diff -Nru xen-4.6.0/tools/libxl/libxl.h xen-4.6.5/tools/libxl/libxl.h --- xen-4.6.0/tools/libxl/libxl.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxl/libxl.h 2017-03-07 16:19:05.000000000 +0000 @@ -1110,7 +1110,7 @@ #if defined(LIBXL_API_VERSION) && LIBXL_API_VERSION < 0x040400 -int static inline libxl_domain_create_restore_0x040200( +static inline int libxl_domain_create_restore_0x040200( libxl_ctx *ctx, libxl_domain_config *d_config, uint32_t *domid, int restore_fd, const libxl_asyncop_how *ao_how, diff -Nru xen-4.6.0/tools/libxl/libxl_internal.h xen-4.6.5/tools/libxl/libxl_internal.h --- xen-4.6.0/tools/libxl/libxl_internal.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxl/libxl_internal.h 2017-03-07 16:19:05.000000000 +0000 @@ -337,7 +337,7 @@ uint32_t domid; LIBXL_LIST_ENTRY(libxl_evgen_disk_eject) entry; libxl_ev_user user; - char *vdev; + char *vdev, *be_ptr_path; }; _hidden void libxl__evdisable_disk_eject(libxl__gc*, libxl_evgen_disk_eject*); @@ -683,6 +683,8 @@ _hidden char *libxl__xs_libxl_path(libxl__gc *gc, uint32_t domid); +_hidden int libxl__backendpath_parse_domid(libxl__gc *gc, const char *be_path, + libxl_domid *domid_out); /*----- "checked" xenstore access functions -----*/ /* Each of these functions will check that it succeeded; if it @@ -1152,6 +1154,7 @@ libxl__device *device, char **bents, char **fents, char **ro_fents); _hidden char *libxl__device_backend_path(libxl__gc *gc, libxl__device *device); _hidden char *libxl__device_frontend_path(libxl__gc *gc, libxl__device *device); +_hidden char *libxl__device_libxl_path(libxl__gc *gc, libxl__device *device); _hidden int libxl__parse_backend_path(libxl__gc *gc, const char *path, libxl__device *dev); _hidden int libxl__device_destroy(libxl__gc *gc, libxl__device *dev); @@ -3940,6 +3943,21 @@ libxl_uuid_copy(CTX, &dst->uuid, &src->uuid); } +/* Target memory in xenstore is different from what user has + * asked for. The difference is video_memkb + (possible) fudge. + * See libxl_set_memory_target. + */ +static inline +uint64_t libxl__get_targetmem_fudge(libxl__gc *gc, + const libxl_domain_build_info *info) +{ + int64_t mem_target_fudge = (info->type == LIBXL_DOMAIN_TYPE_HVM && + info->max_memkb > info->target_memkb) + ? LIBXL_MAXMEM_CONSTANT : 0; + + return info->video_memkb + mem_target_fudge; +} + /* Macros used to compare device identifier. Returns true if the two * devices have same identifier. */ #define COMPARE_DEVID(a, b) ((a)->devid == (b)->devid) diff -Nru xen-4.6.0/tools/libxl/libxl_uuid.h xen-4.6.5/tools/libxl/libxl_uuid.h --- xen-4.6.0/tools/libxl/libxl_uuid.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxl/libxl_uuid.h 2017-03-07 16:19:05.000000000 +0000 @@ -61,7 +61,7 @@ void libxl_uuid_copy(libxl_ctx *ctx_opt, libxl_uuid *dst, const libxl_uuid *src); #if defined(LIBXL_API_VERSION) && LIBXL_API_VERSION < 0x040500 -void static inline libxl_uuid_copy_0x040400(libxl_uuid *dst, +static inline void libxl_uuid_copy_0x040400(libxl_uuid *dst, const libxl_uuid *src) { libxl_uuid_copy(NULL, dst, src); diff -Nru xen-4.6.0/tools/libxl/xl_cmdimpl.c xen-4.6.5/tools/libxl/xl_cmdimpl.c --- xen-4.6.0/tools/libxl/xl_cmdimpl.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/libxl/xl_cmdimpl.c 2017-03-07 16:19:05.000000000 +0000 @@ -7766,7 +7766,11 @@ n++; } } - if (libxl_set_vcpuonline(ctx, 0, &cpumap)) { + if (libxl_domain_info(ctx, &info, 0)) { + fprintf(stderr, "error on getting info for Domain-0\n"); + goto out; + } + if (info.vcpu_online > n && libxl_set_vcpuonline(ctx, 0, &cpumap)) { fprintf(stderr, "error on removing vcpus for Domain-0\n"); goto out; } @@ -7781,7 +7785,7 @@ fprintf(stderr, "error on getting info for Domain-0\n"); goto out; } - if (info.vcpu_online == n) { + if (info.vcpu_online <= n) { break; } sleep(1); diff -Nru xen-4.6.0/tools/ocaml/libs/xb/xs_ring_stubs.c xen-4.6.5/tools/ocaml/libs/xb/xs_ring_stubs.c --- xen-4.6.0/tools/ocaml/libs/xb/xs_ring_stubs.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/ocaml/libs/xb/xs_ring_stubs.c 2017-03-07 16:19:05.000000000 +0000 @@ -50,7 +50,7 @@ struct xenstore_domain_interface *intf = interface->addr; XENSTORE_RING_IDX cons, prod; /* offsets only */ - int to_read; + int total_data, data; uint32_t connection; cons = *(volatile uint32_t*)&intf->req_cons; @@ -65,19 +65,28 @@ if ((prod - cons) > XENSTORE_RING_SIZE) caml_failwith("bad connection"); - if (prod == cons) { + /* Check for any pending data at all. */ + total_data = prod - cons; + if (total_data == 0) { + /* No pending data at all. */ result = 0; goto exit; } - cons = MASK_XENSTORE_IDX(cons); - prod = MASK_XENSTORE_IDX(prod); - if (prod > cons) - to_read = prod - cons; - else - to_read = XENSTORE_RING_SIZE - cons; - if (to_read < len) - len = to_read; - memcpy(buffer, intf->req + cons, len); + else if (total_data < len) + /* Some data - make a partial read. */ + len = total_data; + + /* Check whether data crosses the end of the ring. */ + data = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); + if (len < data) + /* Data within the remaining part of the ring. */ + memcpy(buffer, intf->req + MASK_XENSTORE_IDX(cons), len); + else { + /* Data crosses the ring boundary. Read both halves. */ + memcpy(buffer, intf->req + MASK_XENSTORE_IDX(cons), data); + memcpy(buffer + data, intf->req, len - data); + } + xen_mb(); intf->req_cons += len; result = len; @@ -100,7 +109,7 @@ struct xenstore_domain_interface *intf = interface->addr; XENSTORE_RING_IDX cons, prod; - int can_write; + int total_space, space; uint32_t connection; cons = *(volatile uint32_t*)&intf->rsp_cons; @@ -111,17 +120,32 @@ caml_raise_constant(*caml_named_value("Xb.Reconnect")); xen_mb(); - if ( (prod - cons) >= XENSTORE_RING_SIZE ) { + + if ((prod - cons) > XENSTORE_RING_SIZE) + caml_failwith("bad connection"); + + /* Check for space to write the full message. */ + total_space = XENSTORE_RING_SIZE - (prod - cons); + if (total_space == 0) { + /* No space at all - exit having done nothing. */ result = 0; goto exit; } - if (MASK_XENSTORE_IDX(prod) >= MASK_XENSTORE_IDX(cons)) - can_write = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); - else - can_write = MASK_XENSTORE_IDX(cons) - MASK_XENSTORE_IDX(prod); - if (can_write < len) - len = can_write; - memcpy(intf->rsp + MASK_XENSTORE_IDX(prod), buffer, len); + else if (total_space < len) + /* Some space - make a partial write. */ + len = total_space; + + /* Check for space until the ring wraps. */ + space = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); + if (len < space) + /* Message fits inside the remaining part of the ring. */ + memcpy(intf->rsp + MASK_XENSTORE_IDX(prod), buffer, len); + else { + /* Message wraps around the end of the ring. Write both halves. */ + memcpy(intf->rsp + MASK_XENSTORE_IDX(prod), buffer, space); + memcpy(intf->rsp, buffer + space, len - space); + } + xen_mb(); intf->rsp_prod += len; result = len; diff -Nru xen-4.6.0/tools/ocaml/xenstored/quota.ml xen-4.6.5/tools/ocaml/xenstored/quota.ml --- xen-4.6.0/tools/ocaml/xenstored/quota.ml 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/ocaml/xenstored/quota.ml 2017-03-07 16:19:05.000000000 +0000 @@ -83,6 +83,6 @@ Hashtbl.iter (fun id nb -> set_entry quota id (get_entry quota id + nb)) diff.cur let merge orig_quota mod_quota dest_quota = - Hashtbl.iter (fun id nb -> let diff = nb - (get_entry orig_quota id) in + Hashtbl.iter (fun id nb -> let diff = nb - (try get_entry orig_quota id with Not_found -> 0) in if diff <> 0 then - set_entry dest_quota id ((get_entry dest_quota id) + diff)) mod_quota.cur + set_entry dest_quota id ((try get_entry dest_quota id with Not_found -> 0) + diff)) mod_quota.cur diff -Nru xen-4.6.0/tools/pygrub/src/pygrub xen-4.6.5/tools/pygrub/src/pygrub --- xen-4.6.0/tools/pygrub/src/pygrub 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/pygrub/src/pygrub 2017-03-07 16:19:05.000000000 +0000 @@ -156,6 +156,11 @@ else: part_offs.append(offset) + # We thought we had a DOS partition table, but didn't find any + # actual valid partition entries. This can happen because an MBR + # (e.g. grubs) may contain the same signature. + if not part_offs: part_offs = [0] + return part_offs class GrubLineEditor(curses.textpad.Textbox): @@ -716,14 +721,17 @@ return cfg def format_sxp(kernel, ramdisk, args): - s = "linux (kernel %s)" % kernel + s = "linux (kernel %s)" % repr(kernel) if ramdisk: - s += "(ramdisk %s)" % ramdisk + s += "(ramdisk %s)" % repr(ramdisk) if args: - s += "(args \"%s\")" % args + s += "(args %s)" % repr(args) return s def format_simple(kernel, ramdisk, args, sep): + for check in (kernel, ramdisk, args): + if check is not None and sep in check: + raise RuntimeError, "simple format cannot represent delimiter-containing value" s = ("kernel %s" % kernel) + sep if ramdisk: s += ("ramdisk %s" % ramdisk) + sep diff -Nru xen-4.6.0/tools/python/scripts/convert-legacy-stream xen-4.6.5/tools/python/scripts/convert-legacy-stream --- xen-4.6.0/tools/python/scripts/convert-legacy-stream 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/python/scripts/convert-legacy-stream 2017-03-07 16:19:05.000000000 +0000 @@ -389,8 +389,7 @@ write_page_data(pfns, pages) elif marker == legacy.CHUNK_enable_verify_mode: - # For debugging purposes only. Will not be seen in real migration - raise RuntimeError("Unable to convert a debug stream") + info("This is a debug stream") elif marker == legacy.CHUNK_vcpu_info: max_id, = unpack_exact("i") diff -Nru xen-4.6.0/tools/tests/x86_emulator/test_x86_emulator.c xen-4.6.5/tools/tests/x86_emulator/test_x86_emulator.c --- xen-4.6.0/tools/tests/x86_emulator/test_x86_emulator.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/tests/x86_emulator/test_x86_emulator.c 2017-03-07 16:19:05.000000000 +0000 @@ -129,6 +129,22 @@ (ebx & (1U << 5)) != 0; \ }) +static int read_cr( + unsigned int reg, + unsigned long *val, + struct x86_emulate_ctxt *ctxt) +{ + /* Fake just enough state for the emulator's _get_fpu() to be happy. */ + switch ( reg ) + { + case 0: + *val = 0x00000001; /* PE */ + return X86EMUL_OKAY; + } + + return X86EMUL_UNHANDLEABLE; +} + int get_fpu( void (*exception_callback)(void *, struct cpu_user_regs *), void *exception_callback_arg, @@ -160,6 +176,7 @@ .write = write, .cmpxchg = cmpxchg, .cpuid = cpuid, + .read_cr = read_cr, .get_fpu = get_fpu, }; @@ -412,6 +429,24 @@ goto fail; printf("okay\n"); + printf("%-40s", "Testing cmpxchg8b (%edi) [opsize]..."); + instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0xc7; instr[3] = 0x0f; + res[0] = 0x12345678; + res[1] = 0x87654321; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.edi = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (res[0] != 0x12345678) || + (res[1] != 0x87654321) || + (regs.eax != 0x12345678) || + (regs.edx != 0x87654321) || + ((regs.eflags&0x240) != 0x200) || + (regs.eip != (unsigned long)&instr[4]) ) + goto fail; + printf("okay\n"); + printf("%-40s", "Testing movsxbd (%%eax),%%ecx..."); instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08; regs.eflags = 0x200; @@ -597,6 +632,21 @@ printf("okay\n"); #else printf("skipped\n"); + + printf("%-40s", "Testing cmovz %ecx,%eax..."); + instr[0] = 0x0f; instr[1] = 0x44; instr[2] = 0xc1; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.rax = 0x1111111122222222; + regs.rcx = 0x3333333344444444; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (regs.rax != 0x0000000022222222) || + (regs.rcx != 0x3333333344444444) || + (regs.eflags != 0x200) || + (regs.eip != (unsigned long)&instr[3]) ) + goto fail; + printf("okay\n"); #endif #define decl_insn(which) extern const unsigned char which[], which##_len[] diff -Nru xen-4.6.0/tools/tests/x86_emulator/x86_emulate.c xen-4.6.5/tools/tests/x86_emulator/x86_emulate.c --- xen-4.6.0/tools/tests/x86_emulator/x86_emulate.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/tests/x86_emulator/x86_emulate.c 2017-03-07 16:19:05.000000000 +0000 @@ -8,9 +8,21 @@ typedef bool bool_t; +#define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63)) + #define BUG() abort() #define ASSERT assert +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +/* Force a compilation error if condition is true */ +#define BUILD_BUG_ON(cond) ({ _Static_assert(!(cond), "!(" #cond ")"); }) +#define BUILD_BUG_ON_ZERO(cond) \ + sizeof(struct { _Static_assert(!(cond), "!(" #cond ")"); }) +#else +#define BUILD_BUG_ON_ZERO(cond) sizeof(struct { int:-!!(cond); }) +#define BUILD_BUG_ON(cond) ((void)BUILD_BUG_ON_ZERO(cond)) +#endif + #define cpu_has_amd_erratum(nr) 0 #define mark_regs_dirty(r) ((void)(r)) diff -Nru xen-4.6.0/tools/xenstat/libxenstat/Makefile xen-4.6.5/tools/xenstat/libxenstat/Makefile --- xen-4.6.0/tools/xenstat/libxenstat/Makefile 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/xenstat/libxenstat/Makefile 2017-03-07 16:19:05.000000000 +0000 @@ -40,6 +40,8 @@ .PHONY: all all: $(LIB) $(SHLIB) $(SHLIB_LINKS) +$(OBJECTS-y): src/_paths.h + $(LIB): $(OBJECTS-y) $(AR) rc $@ $^ $(RANLIB) $@ @@ -135,9 +137,12 @@ .PHONY: clean clean: rm -f $(LIB) $(SHLIB) $(SHLIB_LINKS) $(OBJECTS-y) \ - $(BINDINGS) $(BINDINGSRC) $(DEPS) + $(BINDINGS) $(BINDINGSRC) $(DEPS) src/_paths.h .PHONY: distclean distclean: clean -include $(DEPS) + +genpath-target = $(call buildmakevars2header,src/_paths.h) +$(eval $(genpath-target)) diff -Nru xen-4.6.0/tools/xenstat/libxenstat/src/xenstat_qmp.c xen-4.6.5/tools/xenstat/libxenstat/src/xenstat_qmp.c --- xen-4.6.0/tools/xenstat/libxenstat/src/xenstat_qmp.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/tools/xenstat/libxenstat/src/xenstat_qmp.c 2017-03-07 16:19:05.000000000 +0000 @@ -23,6 +23,7 @@ #include #include "xenstat_priv.h" +#include "_paths.h" #ifdef HAVE_YAJL_YAJL_VERSION_H # include @@ -418,7 +419,7 @@ free(val); /* Connect to this VMs QMP socket */ - snprintf(path, sizeof(path), "/var/run/xen/qmp-libxenstat-%i", dominfo[i].domain); + snprintf(path, sizeof(path), XEN_RUN_DIR "/qmp-libxenstat-%i", dominfo[i].domain); if ((qfd = qmp_connect(path)) < 0) { continue; } diff -Nru xen-4.6.0/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c xen-4.6.5/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c --- xen-4.6.0/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c 2017-03-07 16:19:05.000000000 +0000 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff -Nru xen-4.6.0/xen/arch/arm/arm32/entry.S xen-4.6.5/xen/arch/arm/arm32/entry.S --- xen-4.6.0/xen/arch/arm/arm32/entry.S 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/arm32/entry.S 2017-03-07 16:19:05.000000000 +0000 @@ -42,6 +42,61 @@ SAVE_BANKED(fiq) SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); SAVE_ONE_BANKED(R10_fiq) SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq); + /* + * Start to check pending virtual abort in the gap of Guest -> HYP + * world switch. + * + * Save ELR_hyp to check whether the pending virtual abort exception + * takes place while we are doing this trap exception. + */ + mrs r1, ELR_hyp + + /* + * Force loads and stores to complete before unmasking asynchronous + * aborts and forcing the delivery of the exception. + */ + dsb sy + + /* + * Unmask asynchronous abort bit. If there is a pending asynchronous + * abort, the data_abort exception will happen after A bit is cleared. + */ + cpsie a + + /* + * This is our single instruction exception window. A pending + * asynchronous abort is guaranteed to occur at the earliest when we + * unmask it, and at the latest just after the ISB. + * + * If a pending abort occurs, the program will jump to data_abort + * exception handler, and the ELR_hyp will be set to + * abort_guest_exit_start or abort_guest_exit_end. + */ + .global abort_guest_exit_start +abort_guest_exit_start: + + isb + + .global abort_guest_exit_end +abort_guest_exit_end: + /* Mask CPSR asynchronous abort bit, close the checking window. */ + cpsid a + + /* + * Compare ELR_hyp and the saved value to check whether we are + * returning from a valid exception caused by pending virtual + * abort. + */ + mrs r2, ELR_hyp + cmp r1, r2 + + /* + * Not equal, the pending virtual abort exception took place, the + * initial exception does not have any significance to be handled. + * Exit ASAP. + */ + bne return_from_trap + mov pc, lr #define DEFINE_TRAP_ENTRY(trap) \ diff -Nru xen-4.6.0/xen/arch/arm/arm32/head.S xen-4.6.5/xen/arch/arm/arm32/head.S --- xen-4.6.0/xen/arch/arm/arm32/head.S 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/arm32/head.S 2017-03-07 16:19:05.000000000 +0000 @@ -173,8 +173,8 @@ beq hyp /* OK, we're boned. */ - PRINT("- Xen must be entered in NS Hyp mode -\r\n" \ - "- Please update the bootloader -\r\n") + PRINT("- Xen must be entered in NS Hyp mode -\r\n") + PRINT("- Please update the bootloader -\r\n") b fail hyp: PRINT("- Xen starting in Hyp mode -\r\n") diff -Nru xen-4.6.0/xen/arch/arm/arm32/traps.c xen-4.6.5/xen/arch/arm/arm32/traps.c --- xen-4.6.0/xen/arch/arm/arm32/traps.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/arm32/traps.c 2017-03-07 16:19:05.000000000 +0000 @@ -63,7 +63,10 @@ asmlinkage void do_trap_data_abort(struct cpu_user_regs *regs) { - do_unexpected_trap("Data Abort", regs); + if ( VABORT_GEN_BY_GUEST(regs) ) + do_trap_guest_error(regs); + else + do_unexpected_trap("Data Abort", regs); } /* diff -Nru xen-4.6.0/xen/arch/arm/arm64/entry.S xen-4.6.5/xen/arch/arm/arm64/entry.S --- xen-4.6.0/xen/arch/arm/arm64/entry.S 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/arm64/entry.S 2017-03-07 16:19:05.000000000 +0000 @@ -173,6 +173,43 @@ entry hyp=1 invalid BAD_ERROR +hyp_error: + /* + * Only two possibilities: + * 1) Either we come from the exit path, having just unmasked + * PSTATE.A: change the return code to an EL2 fault, and + * carry on, as we're already in a sane state to handle it. + * 2) Or we come from anywhere else, and that's a bug: we panic. + */ + entry hyp=1 + msr daifclr, #2 + + /* + * The ELR_EL2 may be modified by an interrupt, so we have to use the + * saved value in cpu_user_regs to check whether we come from 1) or + * not. + */ + ldr x0, [sp, #UREGS_PC] + adr x1, abort_guest_exit_start + cmp x0, x1 + adr x1, abort_guest_exit_end + ccmp x0, x1, #4, ne + mov x0, sp + mov x1, #BAD_ERROR + + /* + * Not equal, the exception come from 2). It's a bug, we have to + * panic the hypervisor. + */ + b.ne do_bad_mode + + /* + * Otherwise, the exception come from 1). It happened because of + * the guest. Crash this guest. + */ + bl do_trap_guest_error + exit hyp=1 + /* Traps taken in Current EL with SP_ELx */ hyp_sync: entry hyp=1 @@ -189,45 +226,79 @@ guest_sync: entry hyp=0, compat=0 + bl check_pending_vserror + /* + * If x0 is Non-zero, a vSError took place, the initial exception + * doesn't have any significance to be handled. Exit ASAP + */ + cbnz x0, 1f msr daifclr, #2 mov x0, sp bl do_trap_hypervisor +1: exit hyp=0, compat=0 guest_irq: entry hyp=0, compat=0 + bl check_pending_vserror + /* + * If x0 is Non-zero, a vSError took place, the initial exception + * doesn't have any significance to be handled. Exit ASAP + */ + cbnz x0, 1f mov x0, sp bl do_trap_irq +1: exit hyp=0, compat=0 guest_fiq_invalid: entry hyp=0, compat=0 invalid BAD_FIQ -guest_error_invalid: +guest_error: entry hyp=0, compat=0 - invalid BAD_ERROR + msr daifclr, #2 + mov x0, sp + bl do_trap_guest_error + exit hyp=0, compat=0 guest_sync_compat: entry hyp=0, compat=1 + bl check_pending_vserror + /* + * If x0 is Non-zero, a vSError took place, the initial exception + * doesn't have any significance to be handled. Exit ASAP + */ + cbnz x0, 1f msr daifclr, #2 mov x0, sp bl do_trap_hypervisor +1: exit hyp=0, compat=1 guest_irq_compat: entry hyp=0, compat=1 + bl check_pending_vserror + /* + * If x0 is Non-zero, a vSError took place, the initial exception + * doesn't have any significance to be handled. Exit ASAP + */ + cbnz x0, 1f mov x0, sp bl do_trap_irq +1: exit hyp=0, compat=1 guest_fiq_invalid_compat: entry hyp=0, compat=1 invalid BAD_FIQ -guest_error_invalid_compat: +guest_error_compat: entry hyp=0, compat=1 - invalid BAD_ERROR + msr daifclr, #2 + mov x0, sp + bl do_trap_guest_error + exit hyp=0, compat=1 ENTRY(return_to_new_vcpu32) exit hyp=0, compat=1 @@ -264,6 +335,62 @@ eret /* + * This function is used to check pending virtual SError in the gap of + * EL1 -> EL2 world switch. + * The x0 register will be used to indicate the results of detection. + * x0 -- Non-zero indicates a pending virtual SError took place. + * x0 -- Zero indicates no pending virtual SError took place. + */ +check_pending_vserror: + /* + * Save elr_el2 to check whether the pending SError exception takes + * place while we are doing this sync exception. + */ + mrs x0, elr_el2 + + /* Synchronize against in-flight ld/st */ + dsb sy + + /* + * Unmask PSTATE asynchronous abort bit. If there is a pending + * SError, the EL2 error exception will happen after PSTATE.A + * is cleared. + */ + msr daifclr, #4 + + /* + * This is our single instruction exception window. A pending + * SError is guaranteed to occur at the earliest when we unmask + * it, and at the latest just after the ISB. + * + * If a pending SError occurs, the program will jump to EL2 error + * exception handler, and the elr_el2 will be set to + * abort_guest_exit_start or abort_guest_exit_end. + */ +abort_guest_exit_start: + + isb + +abort_guest_exit_end: + /* Mask PSTATE asynchronous abort bit, close the checking window. */ + msr daifset, #4 + + /* + * Compare elr_el2 and the saved value to check whether we are + * returning from a valid exception caused by pending SError. + */ + mrs x1, elr_el2 + cmp x0, x1 + + /* + * Not equal, the pending SError exception took place, set + * x0 to non-zero. + */ + cset x0, ne + + ret + +/* * Exception vectors. */ .macro ventry label @@ -281,17 +408,17 @@ ventry hyp_sync // Synchronous EL2h ventry hyp_irq // IRQ EL2h ventry hyp_fiq_invalid // FIQ EL2h - ventry hyp_error_invalid // Error EL2h + ventry hyp_error // Error EL2h ventry guest_sync // Synchronous 64-bit EL0/EL1 ventry guest_irq // IRQ 64-bit EL0/EL1 ventry guest_fiq_invalid // FIQ 64-bit EL0/EL1 - ventry guest_error_invalid // Error 64-bit EL0/EL1 + ventry guest_error // Error 64-bit EL0/EL1 ventry guest_sync_compat // Synchronous 32-bit EL0/EL1 ventry guest_irq_compat // IRQ 32-bit EL0/EL1 ventry guest_fiq_invalid_compat // FIQ 32-bit EL0/EL1 - ventry guest_error_invalid_compat // Error 32-bit EL0/EL1 + ventry guest_error_compat // Error 32-bit EL0/EL1 /* * struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next) diff -Nru xen-4.6.0/xen/arch/arm/arm64/head.S xen-4.6.5/xen/arch/arm/arm64/head.S --- xen-4.6.0/xen/arch/arm/arm64/head.S 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/arm64/head.S 2017-03-07 16:19:05.000000000 +0000 @@ -309,8 +309,8 @@ b.eq el2 /* Yes */ /* OK, we're boned. */ - PRINT("- Xen must be entered in NS EL2 mode -\r\n" \ - "- Please update the bootloader -\r\n") + PRINT("- Xen must be entered in NS EL2 mode -\r\n") + PRINT("- Please update the bootloader -\r\n") b fail el2: PRINT("- Xen starting at EL2 -\r\n") @@ -342,8 +342,8 @@ * Top byte is used * PT walks use Inner-Shareable accesses, * PT walks are write-back, write-allocate in both cache levels, - * Full 64-bit address space goes through this table. */ - ldr x0, =(TCR_RES1|TCR_SH0_IS|TCR_ORGN0_WBWA|TCR_IRGN0_WBWA|TCR_T0SZ(0)) + * 48-bit virtual address space goes through this table. */ + ldr x0, =(TCR_RES1|TCR_SH0_IS|TCR_ORGN0_WBWA|TCR_IRGN0_WBWA|TCR_T0SZ(64-48)) /* ID_AA64MMFR0_EL1[3:0] (PARange) corresponds to TCR_EL2[18:16] (PS) */ mrs x1, ID_AA64MMFR0_EL1 bfi x0, x1, #16, #3 @@ -361,6 +361,11 @@ ldr x0, =(HSCTLR_BASE) msr SCTLR_EL2, x0 + /* Ensure that any exceptions encountered at EL2 + * are handled using the EL2 stack pointer, rather + * than SP_EL0. */ + msr spsel, #1 + /* Rebuild the boot pagetable's first-level entries. The structure * is described in mm.c. * diff -Nru xen-4.6.0/xen/arch/arm/domain.c xen-4.6.5/xen/arch/arm/domain.c --- xen-4.6.0/xen/arch/arm/domain.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/domain.c 2017-03-07 16:19:05.000000000 +0000 @@ -344,8 +344,6 @@ if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) { - BUG(); /* XXX multicalls not implemented yet. */ - __set_bit(_MCSF_call_preempted, &mcs->flags); for ( i = 0; *p != '\0'; i++ ) @@ -770,8 +768,15 @@ { /* Grab a reference to the page so it won't disappear from under us. */ if ( unlikely(!get_page(page, d)) ) - /* Couldn't get a reference -- someone is freeing this page. */ - BUG(); + /* + * Couldn't get a reference -- someone is freeing this page and + * has already committed to doing so, so no more to do here. + * + * Note that the page must be left on the list, a list_del + * here will clash with the list_del done by the other + * party in the race and corrupt the list head. + */ + continue; if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); diff -Nru xen-4.6.0/xen/arch/arm/gic.c xen-4.6.5/xen/arch/arm/gic.c --- xen-4.6.0/xen/arch/arm/gic.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/gic.c 2017-03-07 16:19:05.000000000 +0000 @@ -195,7 +195,10 @@ */ if ( test_bit(_IRQ_INPROGRESS, &desc->status) || !test_bit(_IRQ_DISABLED, &desc->status) ) + { + vgic_unlock_rank(v_target, rank, flags); return -EBUSY; + } } clear_bit(_IRQ_GUEST, &desc->status); diff -Nru xen-4.6.0/xen/arch/arm/gic-v3.c xen-4.6.5/xen/arch/arm/gic-v3.c --- xen-4.6.0/xen/arch/arm/gic-v3.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/gic-v3.c 2017-03-07 16:19:05.000000000 +0000 @@ -455,7 +455,11 @@ static unsigned int gicv3_read_irq(void) { - return READ_SYSREG32(ICC_IAR1_EL1); + unsigned int irq = READ_SYSREG32(ICC_IAR1_EL1); + + dsb(sy); + + return irq; } static inline uint64_t gicv3_mpidr_to_affinity(int cpu) diff -Nru xen-4.6.0/xen/arch/arm/guestcopy.c xen-4.6.5/xen/arch/arm/guestcopy.c --- xen-4.6.0/xen/arch/arm/guestcopy.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/guestcopy.c 2017-03-07 16:19:05.000000000 +0000 @@ -17,7 +17,7 @@ unsigned size = min(len, (unsigned)PAGE_SIZE - offset); struct page_info *page; - page = get_page_from_gva(current->domain, (vaddr_t) to, GV2M_WRITE); + page = get_page_from_gva(current, (vaddr_t) to, GV2M_WRITE); if ( page == NULL ) return len; @@ -64,7 +64,7 @@ unsigned size = min(len, (unsigned)PAGE_SIZE - offset); struct page_info *page; - page = get_page_from_gva(current->domain, (vaddr_t) to, GV2M_WRITE); + page = get_page_from_gva(current, (vaddr_t) to, GV2M_WRITE); if ( page == NULL ) return len; @@ -96,7 +96,7 @@ unsigned size = min(len, (unsigned)(PAGE_SIZE - offset)); struct page_info *page; - page = get_page_from_gva(current->domain, (vaddr_t) from, GV2M_READ); + page = get_page_from_gva(current, (vaddr_t) from, GV2M_READ); if ( page == NULL ) return len; diff -Nru xen-4.6.0/xen/arch/arm/hvm.c xen-4.6.5/xen/arch/arm/hvm.c --- xen-4.6.0/xen/arch/arm/hvm.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/hvm.c 2017-03-07 16:19:05.000000000 +0000 @@ -57,7 +57,7 @@ default: { - printk("%s: Bad HVM op %ld.\n", __func__, op); + gdprintk(XENLOG_DEBUG, "HVMOP op=%lu: not implemented\n", op); rc = -ENOSYS; break; } diff -Nru xen-4.6.0/xen/arch/arm/mm.c xen-4.6.5/xen/arch/arm/mm.c --- xen-4.6.0/xen/arch/arm/mm.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/mm.c 2017-03-07 16:19:05.000000000 +0000 @@ -724,6 +724,8 @@ else { unsigned long first_mfn = alloc_boot_pages(1, 1); + + clear_page(mfn_to_virt(first_mfn)); pte = mfn_to_xen_entry(first_mfn, WRITEALLOC); pte.pt.table = 1; write_pte(p, pte); @@ -767,6 +769,7 @@ second = mfn_to_virt(second_base); for ( i = 0; i < nr_second; i++ ) { + clear_page(mfn_to_virt(second_base + i)); pte = mfn_to_xen_entry(second_base + i, WRITEALLOC); pte.pt.table = 1; write_pte(&xen_first[first_table_offset(FRAMETABLE_VIRT_START)+i], pte); diff -Nru xen-4.6.0/xen/arch/arm/p2m.c xen-4.6.5/xen/arch/arm/p2m.c --- xen-4.6.0/xen/arch/arm/p2m.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/p2m.c 2017-03-07 16:19:05.000000000 +0000 @@ -1105,29 +1105,26 @@ out: + for ( level = P2M_ROOT_LEVEL; level < 4; level ++ ) + { + if ( mappings[level] ) + unmap_domain_page(mappings[level]); + } + + spin_unlock(&p2m->lock); + if ( rc < 0 && ( op == INSERT || op == ALLOCATE ) && addr != start_gpaddr ) { BUG_ON(addr == end_gpaddr); /* - * addr keeps the address of the last successfully-inserted mapping, - * while apply_p2m_changes() considers an address range which is - * exclusive of end_gpaddr: add level_size to addr to obtain the - * right end of the range + * addr keeps the address of the end of the last successfully-inserted + * mapping. */ - apply_p2m_changes(d, REMOVE, - start_gpaddr, addr + level_sizes[level], orig_maddr, + apply_p2m_changes(d, REMOVE, start_gpaddr, addr, orig_maddr, mattr, 0, p2m_invalid, d->arch.p2m.default_access); } - for ( level = P2M_ROOT_LEVEL; level < 4; level ++ ) - { - if ( mappings[level] ) - unmap_domain_page(mappings[level]); - } - - spin_unlock(&p2m->lock); - return rc; } @@ -1285,7 +1282,8 @@ while ( (pg = page_list_remove_head(&p2m->pages)) ) free_domheap_page(pg); - free_domheap_pages(p2m->root, P2M_ROOT_ORDER); + if ( p2m->root ) + free_domheap_pages(p2m->root, P2M_ROOT_ORDER); p2m->root = NULL; @@ -1458,32 +1456,25 @@ return page; } -struct page_info *get_page_from_gva(struct domain *d, vaddr_t va, +struct page_info *get_page_from_gva(struct vcpu *v, vaddr_t va, unsigned long flags) { + struct domain *d = v->domain; struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page = NULL; paddr_t maddr = 0; int rc; - spin_lock(&p2m->lock); - - if ( unlikely(d != current->domain) ) - { - unsigned long irq_flags; - - local_irq_save(irq_flags); - p2m_load_VTTBR(d); + /* + * XXX: To support a different vCPU, we would need to load the + * VTTBR_EL2, TTBR0_EL1, TTBR1_EL1 and SCTLR_EL1 + */ + if ( v != current ) + return NULL; - rc = gvirt_to_maddr(va, &maddr, flags); + spin_lock(&p2m->lock); - p2m_load_VTTBR(current->domain); - local_irq_restore(irq_flags); - } - else - { - rc = gvirt_to_maddr(va, &maddr, flags); - } + rc = gvirt_to_maddr(va, &maddr, flags); if ( rc ) goto err; diff -Nru xen-4.6.0/xen/arch/arm/physdev.c xen-4.6.5/xen/arch/arm/physdev.c --- xen-4.6.0/xen/arch/arm/physdev.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/physdev.c 2017-03-07 16:19:05.000000000 +0000 @@ -8,12 +8,13 @@ #include #include #include +#include #include int do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) { - printk("%s %d cmd=%d: not implemented yet\n", __func__, __LINE__, cmd); + gdprintk(XENLOG_DEBUG, "PHYSDEVOP cmd=%d: not implemented\n", cmd); return -ENOSYS; } diff -Nru xen-4.6.0/xen/arch/arm/traps.c xen-4.6.5/xen/arch/arm/traps.c --- xen-4.6.0/xen/arch/arm/traps.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/traps.c 2017-03-07 16:19:05.000000000 +0000 @@ -124,7 +124,8 @@ /* Setup hypervisor traps */ WRITE_SYSREG(HCR_PTW|HCR_BSU_INNER|HCR_AMO|HCR_IMO|HCR_FMO|HCR_VM| - HCR_TWE|HCR_TWI|HCR_TSC|HCR_TAC|HCR_SWIO|HCR_TIDCP, HCR_EL2); + HCR_TWE|HCR_TWI|HCR_TSC|HCR_TAC|HCR_SWIO|HCR_TIDCP|HCR_FB, + HCR_EL2); isb(); } @@ -916,7 +917,7 @@ return; } - page = get_page_from_gva(v->domain, sp, GV2M_READ); + page = get_page_from_gva(v, sp, GV2M_READ); if ( page == NULL ) { printk("Failed to convert stack to physical address\n"); @@ -2319,12 +2320,28 @@ if (first) unmap_domain_page(first); } +static inline paddr_t get_faulting_ipa(void) +{ + register_t hpfar = READ_SYSREG(HPFAR_EL2); + + return ((paddr_t)(hpfar & HPFAR_MASK) << (12 - 4)); +} + static void do_trap_instr_abort_guest(struct cpu_user_regs *regs, const union hsr hsr) { int rc; register_t gva = READ_SYSREG(FAR_EL2); + /* + * If this bit has been set, it means that this instruction abort is caused + * by a guest external abort. Currently we crash the guest to protect the + * hypervisor. In future one can better handle this by injecting a virtual + * abort to the guest. + */ + if ( hsr.iabt.eat ) + domain_crash_synchronous(); + switch ( hsr.iabt.ifsc & 0x3f ) { case FSC_FLT_PERM ... FSC_FLT_PERM + 3: @@ -2337,7 +2354,7 @@ }; if ( hsr.iabt.s1ptw ) - gpa = READ_SYSREG(HPFAR_EL2); + gpa = get_faulting_ipa(); else { /* @@ -2379,6 +2396,15 @@ return; } + /* + * If this bit has been set, it means that this data abort is caused + * by a guest external abort. Currently we crash the guest to protect the + * hypervisor. In future one can better handle this by injecting a virtual + * abort to the guest. + */ + if ( dabt.eat ) + domain_crash_synchronous(); + info.dabt = dabt; #ifdef CONFIG_ARM_32 info.gva = READ_CP32(HDFAR); @@ -2387,7 +2413,7 @@ #endif if ( dabt.s1ptw ) - info.gpa = READ_SYSREG(HPFAR_EL2); + info.gpa = get_faulting_ipa(); else { rc = gva_to_ipa(info.gva, &info.gpa, GV2M_READ); @@ -2587,6 +2613,21 @@ } } +asmlinkage void do_trap_guest_error(struct cpu_user_regs *regs) +{ + enter_hypervisor_head(regs); + + /* + * Currently, to ensure hypervisor safety, when we received a + * guest-generated vSerror/vAbort, we just crash the guest to protect + * the hypervisor. In future we can better handle this by injecting + * a vSerror/vAbort to the guest. + */ + gdprintk(XENLOG_WARNING, "Guest(Dom-%u) will be crashed by vSError\n", + current->domain->domain_id); + domain_crash_synchronous(); +} + asmlinkage void do_trap_irq(struct cpu_user_regs *regs) { enter_hypervisor_head(regs); diff -Nru xen-4.6.0/xen/arch/arm/vgic-v2.c xen-4.6.5/xen/arch/arm/vgic-v2.c --- xen-4.6.0/xen/arch/arm/vgic-v2.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/vgic-v2.c 2017-03-07 16:19:05.000000000 +0000 @@ -335,17 +335,16 @@ return 0; case GICD_ICACTIVER ... GICD_ICACTIVERN: - if ( dabt.size != DABT_WORD ) goto bad_width; printk(XENLOG_G_ERR "%pv: vGICD: unhandled word write %#"PRIregister" to ICACTIVER%d\n", v, *r, gicd_reg - GICD_ICACTIVER); - return 0; + goto write_ignore_32; - case GICD_ITARGETSR ... GICD_ITARGETSR + 7: + case GICD_ITARGETSR ... GICD_ITARGETSR7: /* SGI/PPI target is read only */ goto write_ignore_32; - case GICD_ITARGETSR + 8 ... GICD_ITARGETSRN: + case GICD_ITARGETSR8 ... GICD_ITARGETSRN: { /* unsigned long needed for find_next_bit */ unsigned long target; diff -Nru xen-4.6.0/xen/arch/arm/vgic-v3.c xen-4.6.5/xen/arch/arm/vgic-v3.c --- xen-4.6.0/xen/arch/arm/vgic-v3.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/arm/vgic-v3.c 2017-03-07 16:19:05.000000000 +0000 @@ -427,11 +427,10 @@ return 0; case GICD_ICACTIVER ... GICD_ICACTIVERN: - if ( dabt.size != DABT_WORD ) goto bad_width; printk(XENLOG_G_ERR "%pv: %s: unhandled word write %#"PRIregister" to ICACTIVER%d\n", v, name, *r, reg - GICD_ICACTIVER); - return 0; + goto write_ignore_32; case GICD_IPRIORITYR ... GICD_IPRIORITYRN: if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD ) goto bad_width; diff -Nru xen-4.6.0/xen/arch/x86/alternative.c xen-4.6.5/xen/arch/x86/alternative.c --- xen-4.6.0/xen/arch/x86/alternative.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/alternative.c 2017-03-07 16:19:05.000000000 +0000 @@ -174,7 +174,7 @@ memcpy(insnbuf, replacement, a->replacementlen); /* 0xe8/0xe9 are relative branches; fix the offset. */ - if ( (*insnbuf & 0xfe) == 0xe8 && a->replacementlen == 5 ) + if ( a->replacementlen >= 5 && (*insnbuf & 0xfe) == 0xe8 ) *(s32 *)(insnbuf + 1) += replacement - instr; add_nops(insnbuf + a->replacementlen, diff -Nru xen-4.6.0/xen/arch/x86/apic.c xen-4.6.5/xen/arch/x86/apic.c --- xen-4.6.0/xen/arch/x86/apic.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/apic.c 2017-03-07 16:19:05.000000000 +0000 @@ -943,8 +943,18 @@ mask_8259A(); mask_IO_APIC_setup(ioapic_entries); - if ( iommu_enable_x2apic_IR() ) + switch ( iommu_enable_x2apic_IR() ) { + case 0: + break; + case -ENXIO: /* ACPI_DMAR_X2APIC_OPT_OUT set */ + if ( !x2apic_enabled ) + { + printk("Not enabling x2APIC (upon firmware request)\n"); + goto restore_out; + } + /* fall through */ + default: if ( x2apic_enabled ) panic("Interrupt remapping could not be enabled while " "x2APIC is already enabled by BIOS"); diff -Nru xen-4.6.0/xen/arch/x86/boot/build32.mk xen-4.6.5/xen/arch/x86/boot/build32.mk --- xen-4.6.0/xen/arch/x86/boot/build32.mk 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/boot/build32.mk 2017-03-07 16:19:05.000000000 +0000 @@ -16,11 +16,7 @@ $(OBJCOPY) -O binary $< $@ %.lnk: %.o - $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0 -o $@ $< - -%.o: %.c - $(CC) $(CFLAGS) -c -fpic $< -o $@ - $(OBJDUMP) -h $@ | sed -n '/[0-9]/{s,00*,0,g;p;}' |\ + $(OBJDUMP) -h $< | sed -n '/[0-9]/{s,00*,0,g;p;}' |\ while read idx name sz rest; do \ case "$$name" in \ .data|.data.*|.rodata|.rodata.*|.bss|.bss.*) \ @@ -29,6 +25,10 @@ exit $$(expr $$idx + 1);; \ esac; \ done + $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0 -o $@ $< + +%.o: %.c + $(CC) $(CFLAGS) -c -fpic $< -o $@ reloc.o: reloc.c $(RELOC_DEPS) diff -Nru xen-4.6.0/xen/arch/x86/cpu/amd.c xen-4.6.5/xen/arch/x86/cpu/amd.c --- xen-4.6.0/xen/arch/x86/cpu/amd.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/cpu/amd.c 2017-03-07 16:19:05.000000000 +0000 @@ -563,6 +563,18 @@ smp_processor_id()); wrmsrl(MSR_AMD64_LS_CFG, value | (1 << 15)); } + } else if (c->x86 == 0x12) { + rdmsrl(MSR_AMD64_DE_CFG, value); + if (!(value & (1U << 31))) { + static bool_t warned; + + if (c == &boot_cpu_data || opt_cpu_info || + !test_and_set_bool(warned)) + printk(KERN_WARNING + "CPU%u: Applying workaround for erratum 665\n", + smp_processor_id()); + wrmsrl(MSR_AMD64_DE_CFG, value | (1U << 31)); + } } /* AMD CPUs do not support SYSENTER outside of legacy mode. */ diff -Nru xen-4.6.0/xen/arch/x86/cpu/common.c xen-4.6.5/xen/arch/x86/cpu/common.c --- xen-4.6.0/xen/arch/x86/cpu/common.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/cpu/common.c 2017-03-07 16:19:05.000000000 +0000 @@ -38,6 +38,7 @@ const struct cpu_dev *__read_mostly cpu_devs[X86_VENDOR_NUM] = {}; unsigned int paddr_bits __read_mostly = 36; +unsigned int hap_paddr_bits __read_mostly = 36; /* * Default host IA32_CR_PAT value to cover all memory types. @@ -211,7 +212,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { - u32 tfms, capability, excap, ebx; + u32 tfms, capability, excap, ebx, eax; /* Get vendor name */ cpuid(0x00000000, &c->cpuid_level, @@ -248,8 +249,11 @@ } if ( c->extended_cpuid_level >= 0x80000004 ) get_model_name(c); /* Default name */ - if ( c->extended_cpuid_level >= 0x80000008 ) - paddr_bits = cpuid_eax(0x80000008) & 0xff; + if ( c->extended_cpuid_level >= 0x80000008 ) { + eax = cpuid_eax(0x80000008); + paddr_bits = eax & 0xff; + hap_paddr_bits = ((eax >> 16) & 0xff) ?: paddr_bits; + } } /* Might lift BIOS max_leaf=3 limit. */ diff -Nru xen-4.6.0/xen/arch/x86/cpu/mcheck/non-fatal.c xen-4.6.5/xen/arch/x86/cpu/mcheck/non-fatal.c --- xen-4.6.0/xen/arch/x86/cpu/mcheck/non-fatal.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/cpu/mcheck/non-fatal.c 2017-03-07 16:19:05.000000000 +0000 @@ -94,8 +94,8 @@ if (mce_disabled || !mce_available(c)) return -ENODEV; - if ( __get_cpu_var(poll_bankmask) == NULL ) - return -EINVAL; + if (__get_cpu_var(poll_bankmask) == NULL) + return -EINVAL; /* * Check for non-fatal errors every MCE_RATE s diff -Nru xen-4.6.0/xen/arch/x86/cpu/mtrr/generic.c xen-4.6.5/xen/arch/x86/cpu/mtrr/generic.c --- xen-4.6.0/xen/arch/x86/cpu/mtrr/generic.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/cpu/mtrr/generic.c 2017-03-07 16:19:05.000000000 +0000 @@ -220,7 +220,7 @@ /* Doesn't attempt to pass an error out to MTRR users because it's quite complicated in some cases and probably not worth it because the best error handling is to ignore it. */ -void mtrr_wrmsr(unsigned int msr, uint64_t msr_content) +static void mtrr_wrmsr(unsigned int msr, uint64_t msr_content) { if (wrmsr_safe(msr, msr_content) < 0) printk(KERN_ERR @@ -495,8 +495,8 @@ if (size == 0) { /* The invalid bit is kept in the mask, so we simply clear the relevant mask register to disable a range. */ + memset(vr, 0, sizeof(*vr)); mtrr_wrmsr(MSR_IA32_MTRR_PHYSMASK(reg), 0); - memset(vr, 0, sizeof(struct mtrr_var_range)); } else { uint32_t base_lo, base_hi, mask_lo, mask_hi; diff -Nru xen-4.6.0/xen/arch/x86/cpu/mtrr/mtrr.h xen-4.6.5/xen/arch/x86/cpu/mtrr/mtrr.h --- xen-4.6.0/xen/arch/x86/cpu/mtrr/mtrr.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/cpu/mtrr/mtrr.h 2017-03-07 16:19:05.000000000 +0000 @@ -63,7 +63,6 @@ extern unsigned int num_var_ranges; void mtrr_state_warn(void); -void mtrr_wrmsr(unsigned int msr, uint64_t msr_content); extern int amd_init_mtrr(void); extern int cyrix_init_mtrr(void); diff -Nru xen-4.6.0/xen/arch/x86/cpu/vpmu.c xen-4.6.5/xen/arch/x86/cpu/vpmu.c --- xen-4.6.0/xen/arch/x86/cpu/vpmu.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/cpu/vpmu.c 2017-03-07 16:19:05.000000000 +0000 @@ -140,7 +140,7 @@ return ret; nop: - if ( !is_write ) + if ( !is_write && (msr != MSR_IA32_MISC_ENABLE) ) *msr_content = 0; return 0; @@ -480,6 +480,8 @@ return; /* Don't bother restoring vpmu_count, VPMU is off forever */ } + vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED; + if ( ret ) printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v); @@ -682,8 +684,8 @@ vpmu_mode = pmu_params.val; else if ( vpmu_mode != pmu_params.val ) { - printk(XENLOG_WARNING - "VPMU: Cannot change mode while active VPMUs exist\n"); + gprintk(XENLOG_WARNING, + "VPMU: Cannot change mode while active VPMUs exist\n"); ret = -EBUSY; } @@ -714,8 +716,8 @@ vpmu_features = pmu_params.val; else { - printk(XENLOG_WARNING "VPMU: Cannot change features while" - " active VPMUs exist\n"); + gprintk(XENLOG_WARNING, + "VPMU: Cannot change features while active VPMUs exist\n"); ret = -EBUSY; } diff -Nru xen-4.6.0/xen/arch/x86/cpu/vpmu_intel.c xen-4.6.5/xen/arch/x86/cpu/vpmu_intel.c --- xen-4.6.0/xen/arch/x86/cpu/vpmu_intel.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/cpu/vpmu_intel.c 2017-03-07 16:19:05.000000000 +0000 @@ -87,7 +87,7 @@ /* Masks used for testing whether and MSR is valid */ #define ARCH_CTRL_MASK (~((1ull << 32) - 1) | (1ull << 21)) static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask; -static uint64_t __read_mostly global_ovf_ctrl_mask; +static uint64_t __read_mostly global_ovf_ctrl_mask, global_ctrl_mask; /* Total size of PMU registers block (copied to/from PV(H) guest) */ static unsigned int __read_mostly regs_sz; @@ -166,10 +166,9 @@ */ static int core2_get_fixed_pmc_count(void) { - u32 eax; + u32 edx = cpuid_edx(0xa); - eax = cpuid_eax(0xa); - return MASK_EXTR(eax, PMU_FIXED_NR_MASK); + return MASK_EXTR(edx, PMU_FIXED_NR_MASK); } /* edx bits 5-12: Bit width of fixed-function performance counters */ @@ -265,7 +264,6 @@ clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap); clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap); clear_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap); } @@ -297,7 +295,6 @@ set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap); set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap); set_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap); } @@ -368,7 +365,6 @@ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl); wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area); - wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt->pebs_enable); if ( !has_hvm_container_vcpu(v) ) { @@ -392,6 +388,10 @@ if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask ) return -EINVAL; + if ( core2_vpmu_cxt->global_ctrl & global_ctrl_mask ) + return -EINVAL; + if ( core2_vpmu_cxt->pebs_enable ) + return -EINVAL; fixed_ctrl = core2_vpmu_cxt->fixed_ctrl; if ( fixed_ctrl & fixed_ctrl_mask ) @@ -602,10 +602,9 @@ "MSR_PERF_GLOBAL_STATUS(0x38E)!\n"); return -EINVAL; case MSR_IA32_PEBS_ENABLE: - if ( msr_content & 1 ) - gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, " - "which is not supported.\n"); - core2_vpmu_cxt->pebs_enable = msr_content; + if ( msr_content ) + /* PEBS is reported as unavailable in MSR_IA32_MISC_ENABLE */ + return -EINVAL; return 0; case MSR_IA32_DS_AREA: if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) @@ -623,6 +622,8 @@ gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n"); return 0; case MSR_CORE_PERF_GLOBAL_CTRL: + if ( msr_content & global_ctrl_mask ) + return -EINVAL; core2_vpmu_cxt->global_ctrl = msr_content; break; case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -725,6 +726,7 @@ /* Extension for BTS */ if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; + *msr_content |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL; } return 0; @@ -734,11 +736,11 @@ unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - if (input == 0x1) + switch ( input ) { - struct vpmu_struct *vpmu = vcpu_vpmu(current); + case 0x1: - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) + if ( vpmu_is_set(vcpu_vpmu(current), VPMU_CPU_HAS_DS) ) { /* Switch on the 'Debug Store' feature in CPUID.EAX[1]:EDX[21] */ *edx |= cpufeat_mask(X86_FEATURE_DS); @@ -747,6 +749,13 @@ if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) *ecx |= cpufeat_mask(X86_FEATURE_DSCPL); } + break; + + case 0xa: + /* Report at most version 3 since that's all we currently emulate */ + if ( MASK_EXTR(*eax, PMU_VERSION_MASK) > 3 ) + *eax = (*eax & ~PMU_VERSION_MASK) | MASK_INSR(3, PMU_VERSION_MASK); + break; } } @@ -809,7 +818,7 @@ if ( is_pmc_quirk ) handle_pmc_quirk(msr_content); core2_vpmu_cxt->global_status |= msr_content; - msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1); + msr_content &= ~global_ovf_ctrl_mask; wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); } else @@ -956,59 +965,32 @@ int __init core2_vpmu_init(void) { u64 caps; + unsigned int version = 0; - if ( current_cpu_data.x86 != 6 ) - { - printk(XENLOG_WARNING "VPMU: only family 6 is supported\n"); - return -EINVAL; - } + if ( current_cpu_data.cpuid_level >= 0xa ) + version = MASK_EXTR(cpuid_eax(0xa), PMU_VERSION_MASK); - switch ( current_cpu_data.x86_model ) + switch ( version ) { - /* Core2: */ - case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 0x17: /* 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 0x1d: /* six-core 45 nm xeon "Dunnington" */ - - case 0x2a: /* SandyBridge */ - case 0x2d: /* SandyBridge, "Romley-EP" */ - - /* Nehalem: */ - case 0x1a: /* 45 nm nehalem, "Bloomfield" */ - case 0x1e: /* 45 nm nehalem, "Lynnfield", "Clarksfield", "Jasper Forest" */ - case 0x2e: /* 45 nm nehalem-ex, "Beckton" */ - - /* Westmere: */ - case 0x25: /* 32 nm nehalem, "Clarkdale", "Arrandale" */ - case 0x2c: /* 32 nm nehalem, "Gulftown", "Westmere-EP" */ - case 0x2f: /* 32 nm Westmere-EX */ - - case 0x3a: /* IvyBridge */ - case 0x3e: /* IvyBridge EP */ - - /* Haswell: */ - case 0x3c: - case 0x3f: - case 0x45: - case 0x46: - - /* Broadwell */ - case 0x3d: - case 0x4f: - case 0x56: + case 4: + printk(XENLOG_INFO "VPMU: PMU version 4 is not fully supported. " + "Emulating version 3\n"); + /* FALLTHROUGH */ - /* future: */ - case 0x4e: + case 2: + case 3: + break; - /* next gen Xeon Phi */ - case 0x57: - break; + default: + printk(XENLOG_WARNING "VPMU: PMU version %u is not supported\n", + version); + return -EINVAL; + } - default: - printk(XENLOG_WARNING "VPMU: Unsupported CPU model %#x\n", - current_cpu_data.x86_model); - return -EINVAL; + if ( current_cpu_data.x86 != 6 ) + { + printk(XENLOG_WARNING "VPMU: only family 6 is supported\n"); + return -EINVAL; } arch_pmc_cnt = core2_get_arch_pmc_count(); @@ -1017,10 +999,20 @@ full_width_write = (caps >> 13) & 1; fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1); + if ( version == 2 ) + fixed_ctrl_mask |= 0x444; fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1); + global_ctrl_mask = ~((((1ULL << fixed_pmc_cnt) - 1) << 32) | + ((1ULL << arch_pmc_cnt) - 1)); global_ovf_ctrl_mask = ~(0xC000000000000000 | (((1ULL << fixed_pmc_cnt) - 1) << 32) | ((1ULL << arch_pmc_cnt) - 1)); + if ( version > 2 ) + /* + * Even though we don't support Uncore counters guests should be + * able to clear all available overflows. + */ + global_ovf_ctrl_mask &= ~(1ULL << 61); regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) + sizeof(uint64_t) * fixed_pmc_cnt + diff -Nru xen-4.6.0/xen/arch/x86/domain.c xen-4.6.5/xen/arch/x86/domain.c --- xen-4.6.0/xen/arch/x86/domain.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/domain.c 2017-03-07 16:19:05.000000000 +0000 @@ -64,7 +64,6 @@ #include DEFINE_PER_CPU(struct vcpu *, curr_vcpu); -DEFINE_PER_CPU(unsigned long, cr4); static void default_idle(void); void (*pm_idle) (void) __read_mostly = default_idle; @@ -375,6 +374,8 @@ release_compat_l4(v); } + d->arch.x87_fip_width = cpu_has_fpu_sel ? 0 : 8; + return 0; } @@ -408,6 +409,8 @@ domain_set_alloc_bitsize(d); + d->arch.x87_fip_width = 4; + return 0; undo_and_fail: @@ -635,21 +638,18 @@ } spin_lock_init(&d->arch.e820_lock); + if ( (rc = psr_domain_init(d)) != 0 ) + goto fail; + if ( has_hvm_container_domain(d) ) { if ( (rc = hvm_domain_initialise(d)) != 0 ) - { - iommu_domain_destroy(d); goto fail; - } } else /* 64-bit PV guest by default. */ d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0; - if ( (rc = psr_domain_init(d)) != 0 ) - goto fail; - /* initialize default tsc behavior in case tools don't */ tsc_set_info(d, TSC_MODE_DEFAULT, 0UL, 0, 0); spin_lock_init(&d->arch.vtsc_lock); @@ -657,18 +657,26 @@ /* PV/PVH guests get an emulated PIT too for video BIOSes to use. */ pit_init(d, cpu_khz); + /* + * If the FPU does not save FCS/FDS then we can always + * save/restore the 64-bit FIP/FDP and ignore the selectors. + */ + d->arch.x87_fip_width = cpu_has_fpu_sel ? 0 : 8; + return 0; fail: d->is_dying = DOMDYING_dead; + psr_domain_free(d); + iommu_domain_destroy(d); cleanup_domain_irq_mapping(d); free_xenheap_page(d->shared_info); + xfree(d->arch.cpuids); if ( paging_initialised ) paging_final_teardown(d); free_perdomain_mappings(d); if ( is_pv_domain(d) ) free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab); - psr_domain_free(d); return rc; } @@ -681,6 +689,7 @@ hvm_domain_destroy(d); xfree(d->arch.e820); + xfree(d->arch.cpuids); free_domain_pirqs(d); if ( !is_idle_domain(d) ) @@ -787,7 +796,13 @@ { if ( !compat ) { - if ( !is_canonical_address(c.nat->user_regs.eip) || + if ( !is_canonical_address(c.nat->user_regs.rip) || + !is_canonical_address(c.nat->user_regs.rsp) || + !is_canonical_address(c.nat->kernel_sp) || + (c.nat->ldt_ents && !is_canonical_address(c.nat->ldt_base)) || + !is_canonical_address(c.nat->fs_base) || + !is_canonical_address(c.nat->gs_base_kernel) || + !is_canonical_address(c.nat->gs_base_user) || !is_canonical_address(c.nat->event_callback_eip) || !is_canonical_address(c.nat->syscall_callback_eip) || !is_canonical_address(c.nat->failsafe_callback_eip) ) @@ -851,6 +866,17 @@ if ( v->arch.xsave_area ) v->arch.xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE; } + else if ( v->arch.xsave_area ) + memset(&v->arch.xsave_area->xsave_hdr, 0, + sizeof(v->arch.xsave_area->xsave_hdr)); + else + { + typeof(v->arch.xsave_area->fpu_sse) *fpu_sse = v->arch.fpu_ctxt; + + memset(fpu_sse, 0, sizeof(*fpu_sse)); + fpu_sse->fcw = FCW_DEFAULT; + fpu_sse->mxcsr = MXCSR_DEFAULT; + } if ( !compat ) { @@ -980,7 +1006,48 @@ goto out; if ( v->vcpu_id == 0 ) + { + /* + * In the restore case we need to deal with L4 pages which got + * initialized with m2p_strict still clear (and which hence lack the + * correct initial RO_MPT_VIRT_{START,END} L4 entry). + */ + if ( d != current->domain && !VM_ASSIST(d, m2p_strict) && + is_pv_domain(d) && !is_pv_32bit_domain(d) && + test_bit(VMASST_TYPE_m2p_strict, &c.nat->vm_assist) && + atomic_read(&d->arch.pv_domain.nr_l4_pages) ) + { + bool_t done = 0; + + spin_lock_recursive(&d->page_alloc_lock); + + for ( i = 0; ; ) + { + struct page_info *page = page_list_remove_head(&d->page_list); + + if ( page_lock(page) ) + { + if ( (page->u.inuse.type_info & PGT_type_mask) == + PGT_l4_page_table ) + done = !fill_ro_mpt(page_to_mfn(page)); + + page_unlock(page); + } + + page_list_add_tail(page, &d->page_list); + + if ( done || (!(++i & 0xff) && hypercall_preempt_check()) ) + break; + } + + spin_unlock_recursive(&d->page_alloc_lock); + + if ( !done ) + return -ERESTART; + } + d->vm_assist = c(vm_assist); + } rc = put_old_guest_table(current); if ( rc ) diff -Nru xen-4.6.0/xen/arch/x86/domctl.c xen-4.6.5/xen/arch/x86/domctl.c --- xen-4.6.0/xen/arch/x86/domctl.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/domctl.c 2017-03-07 16:19:05.000000000 +0000 @@ -464,7 +464,7 @@ case XEN_DOMCTL_ioport_mapping: { - struct hvm_iommu *hd; + struct domain_iommu *hd; unsigned int fgp = domctl->u.ioport_mapping.first_gport; unsigned int fmp = domctl->u.ioport_mapping.first_mport; unsigned int np = domctl->u.ioport_mapping.nr_ports; @@ -490,7 +490,7 @@ if ( ret ) break; - hd = domain_hvm_iommu(d); + hd = dom_iommu(d); if ( add ) { printk(XENLOG_G_INFO @@ -869,19 +869,25 @@ unsigned int size; ret = 0; - vcpu_pause(v); - size = PV_XSAVE_SIZE(v->arch.xcr0_accum); if ( (!evc->size && !evc->xfeature_mask) || guest_handle_is_null(evc->buffer) ) { + /* + * A query for the size of buffer to use. Must return the + * maximum size we ever might hand back to userspace, bearing + * in mind that the vcpu might increase its xcr0_accum between + * this query for size, and the following query for data. + */ evc->xfeature_mask = xfeature_mask; - evc->size = size; - vcpu_unpause(v); + evc->size = PV_XSAVE_SIZE(xfeature_mask); goto vcpuextstate_out; } - if ( evc->size != size || evc->xfeature_mask != xfeature_mask ) + vcpu_pause(v); + size = PV_XSAVE_SIZE(v->arch.xcr0_accum); + + if ( evc->size < size || evc->xfeature_mask != xfeature_mask ) ret = -EINVAL; if ( !ret && copy_to_guest_offset(evc->buffer, offset, @@ -902,6 +908,10 @@ ret = -EFAULT; vcpu_unpause(v); + + /* Specify how much data we actually wrote into the buffer. */ + if ( !ret ) + evc->size = size; } else { diff -Nru xen-4.6.0/xen/arch/x86/flushtlb.c xen-4.6.5/xen/arch/x86/flushtlb.c --- xen-4.6.0/xen/arch/x86/flushtlb.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/flushtlb.c 2017-03-07 16:19:05.000000000 +0000 @@ -135,7 +135,8 @@ if ( order < (BITS_PER_LONG - PAGE_SHIFT) ) sz = 1UL << (order + PAGE_SHIFT); - if ( !(flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL)) && + if ( (!(flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL)) || + (flags & FLUSH_VA_VALID)) && c->x86_clflush_size && c->x86_cache_size && sz && ((sz >> 10) < c->x86_cache_size) ) { diff -Nru xen-4.6.0/xen/arch/x86/hvm/emulate.c xen-4.6.5/xen/arch/x86/hvm/emulate.c --- xen-4.6.0/xen/arch/x86/hvm/emulate.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/emulate.c 2017-03-07 16:19:05.000000000 +0000 @@ -94,7 +94,7 @@ }; static int hvmemul_do_io( - bool_t is_mmio, paddr_t addr, unsigned long reps, unsigned int size, + bool_t is_mmio, paddr_t addr, unsigned long *reps, unsigned int size, uint8_t dir, bool_t df, bool_t data_is_addr, uintptr_t data) { struct vcpu *curr = current; @@ -103,7 +103,7 @@ .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO, .addr = addr, .size = size, - .count = reps, + .count = *reps, .dir = dir, .df = df, .data = data, @@ -132,10 +132,10 @@ p = vio->io_req; /* Verify the emulation request has been correctly re-issued */ - if ( (p.type != is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO) || + if ( (p.type != (is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO)) || (p.addr != addr) || (p.size != size) || - (p.count != reps) || + (p.count > *reps) || (p.dir != dir) || (p.df != df) || (p.data_is_ptr != data_is_addr) ) @@ -143,6 +143,8 @@ if ( data_is_addr ) return X86EMUL_UNHANDLEABLE; + + *reps = p.count; goto finish_access; default: return X86EMUL_UNHANDLEABLE; @@ -160,6 +162,13 @@ rc = hvm_io_intercept(&p); + /* + * p.count may have got reduced (see hvm_process_io_intercept()) - inform + * our callers and mirror this into latched state. + */ + ASSERT(p.count <= *reps); + *reps = vio->io_req.count = p.count; + switch ( rc ) { case X86EMUL_OKAY: @@ -213,7 +222,7 @@ BUG_ON(buffer == NULL); - rc = hvmemul_do_io(is_mmio, addr, *reps, size, dir, df, 0, + rc = hvmemul_do_io(is_mmio, addr, reps, size, dir, df, 0, (uintptr_t)buffer); if ( rc == X86EMUL_UNHANDLEABLE && dir == IOREQ_READ ) memset(buffer, 0xff, size); @@ -304,13 +313,13 @@ count = 1; } - rc = hvmemul_do_io(is_mmio, addr, count, size, dir, df, 1, + rc = hvmemul_do_io(is_mmio, addr, &count, size, dir, df, 1, ram_gpa); + if ( rc == X86EMUL_OKAY ) - { v->arch.hvm_vcpu.hvm_io.mmio_retry = (count < *reps); - *reps = count; - } + + *reps = count; out: while ( nr_pages ) @@ -517,6 +526,8 @@ ? 1 : 4096); reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); + if ( IS_ERR(reg) ) + return -PTR_ERR(reg); if ( (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1) ) { @@ -744,7 +755,7 @@ rc = hvmemul_virtual_to_linear( seg, offset, bytes, &reps, access_type, hvmemul_ctxt, &addr); - if ( rc != X86EMUL_OKAY ) + if ( rc != X86EMUL_OKAY || !bytes ) return rc; if ( ((access_type != hvm_access_insn_fetch ? vio->mmio_access.read_access @@ -810,13 +821,17 @@ container_of(ctxt, struct hvm_emulate_ctxt, ctxt); unsigned int insn_off = offset - hvmemul_ctxt->insn_buf_eip; - /* Fall back if requested bytes are not in the prefetch cache. */ - if ( unlikely((insn_off + bytes) > hvmemul_ctxt->insn_buf_bytes) ) + /* + * Fall back if requested bytes are not in the prefetch cache. + * But always perform the (fake) read when bytes == 0. + */ + if ( !bytes || + unlikely((insn_off + bytes) > hvmemul_ctxt->insn_buf_bytes) ) { int rc = __hvmemul_read(seg, offset, p_data, bytes, hvm_access_insn_fetch, hvmemul_ctxt); - if ( rc == X86EMUL_OKAY ) + if ( rc == X86EMUL_OKAY && bytes ) { ASSERT(insn_off + bytes <= sizeof(hvmemul_ctxt->insn_buf)); memcpy(&hvmemul_ctxt->insn_buf[insn_off], p_data, bytes); @@ -848,7 +863,7 @@ rc = hvmemul_virtual_to_linear( seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr); - if ( rc != X86EMUL_OKAY ) + if ( rc != X86EMUL_OKAY || !bytes ) return rc; if ( vio->mmio_access.write_access && @@ -1347,6 +1362,10 @@ struct hvm_emulate_ctxt *hvmemul_ctxt = container_of(ctxt, struct hvm_emulate_ctxt, ctxt); struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); + + if ( IS_ERR(sreg) ) + return -PTR_ERR(sreg); + memcpy(reg, sreg, sizeof(struct segment_register)); return X86EMUL_OKAY; } @@ -1360,6 +1379,9 @@ container_of(ctxt, struct hvm_emulate_ctxt, ctxt); struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); + if ( IS_ERR(sreg) ) + return -PTR_ERR(sreg); + memcpy(sreg, reg, sizeof(struct segment_register)); __set_bit(seg, &hvmemul_ctxt->seg_reg_dirty); @@ -1535,6 +1557,7 @@ switch ( type ) { case X86EMUL_FPU_fpu: + case X86EMUL_FPU_wait: break; case X86EMUL_FPU_mmx: if ( !cpu_has_mmx ) @@ -1542,7 +1565,6 @@ break; case X86EMUL_FPU_xmm: if ( !cpu_has_xmm || - (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_EM) || !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSFXSR) ) return X86EMUL_UNHANDLEABLE; break; @@ -1598,9 +1620,12 @@ { int rc; + if ( !hvm_funcs.altp2m_vcpu_emulate_vmfunc ) + return X86EMUL_UNHANDLEABLE; rc = hvm_funcs.altp2m_vcpu_emulate_vmfunc(ctxt->regs); - if ( rc != X86EMUL_OKAY ) - hvmemul_inject_hw_exception(TRAP_invalid_op, 0, ctxt); + if ( rc == X86EMUL_EXCEPTION ) + hvmemul_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, + ctxt); return rc; } @@ -1778,6 +1803,52 @@ return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops_no_write); } +int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla) +{ + static const struct x86_emulate_ops hvm_intercept_ops_mmcfg = { + .read = x86emul_unhandleable_rw, + .insn_fetch = hvmemul_insn_fetch, + .write = mmcfg_intercept_write, + }; + static const struct x86_emulate_ops hvm_ro_emulate_ops_mmio = { + .read = x86emul_unhandleable_rw, + .insn_fetch = hvmemul_insn_fetch, + .write = mmio_ro_emulated_write + }; + struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = gla }; + struct hvm_emulate_ctxt ctxt; + const struct x86_emulate_ops *ops; + unsigned int seg, bdf; + int rc; + + if ( pci_ro_mmcfg_decode(mfn, &seg, &bdf) ) + { + mmio_ro_ctxt.seg = seg; + mmio_ro_ctxt.bdf = bdf; + ops = &hvm_intercept_ops_mmcfg; + } + else + ops = &hvm_ro_emulate_ops_mmio; + + hvm_emulate_prepare(&ctxt, guest_cpu_user_regs()); + ctxt.ctxt.data = &mmio_ro_ctxt; + rc = _hvm_emulate_one(&ctxt, ops); + switch ( rc ) + { + case X86EMUL_UNHANDLEABLE: + hvm_dump_emulation_state(XENLOG_G_WARNING "MMCFG", &ctxt); + break; + case X86EMUL_EXCEPTION: + if ( ctxt.exn_pending ) + hvm_inject_trap(&ctxt.trap); + /* fallthrough */ + default: + hvm_emulate_writeback(&ctxt); + } + + return rc; +} + void hvm_mem_access_emulate_one(enum emul_kind kind, unsigned int trapnr, unsigned int errcode) { @@ -1852,13 +1923,22 @@ } } +/* + * Callers which pass a known in-range x86_segment can rely on the return + * pointer being valid. Other callers must explicitly check for errors. + */ struct segment_register *hvmemul_get_seg_reg( enum x86_segment seg, struct hvm_emulate_ctxt *hvmemul_ctxt) { - if ( !__test_and_set_bit(seg, &hvmemul_ctxt->seg_reg_accessed) ) - hvm_get_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]); - return &hvmemul_ctxt->seg_reg[seg]; + unsigned int idx = seg; + + if ( idx >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) ) + return ERR_PTR(-X86EMUL_UNHANDLEABLE); + + if ( !__test_and_set_bit(idx, &hvmemul_ctxt->seg_reg_accessed) ) + hvm_get_segment_register(current, idx, &hvmemul_ctxt->seg_reg[idx]); + return &hvmemul_ctxt->seg_reg[idx]; } static const char *guest_x86_mode_to_str(int mode) diff -Nru xen-4.6.0/xen/arch/x86/hvm/hvm.c xen-4.6.5/xen/arch/x86/hvm/hvm.c --- xen-4.6.0/xen/arch/x86/hvm/hvm.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/hvm.c 2017-03-07 16:19:05.000000000 +0000 @@ -308,13 +308,20 @@ } delta_tsc = guest_tsc - tsc; - v->arch.hvm_vcpu.msr_tsc_adjust += delta_tsc - - v->arch.hvm_vcpu.cache_tsc_offset; v->arch.hvm_vcpu.cache_tsc_offset = delta_tsc; hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, at_tsc); } +static void hvm_set_guest_tsc_msr(struct vcpu *v, u64 guest_tsc) +{ + uint64_t tsc_offset = v->arch.hvm_vcpu.cache_tsc_offset; + + hvm_set_guest_tsc(v, guest_tsc); + v->arch.hvm_vcpu.msr_tsc_adjust += v->arch.hvm_vcpu.cache_tsc_offset + - tsc_offset; +} + void hvm_set_guest_tsc_adjust(struct vcpu *v, u64 tsc_adjust) { v->arch.hvm_vcpu.cache_tsc_offset += tsc_adjust @@ -448,7 +455,10 @@ { while ( sv->pending ) { - switch ( p->state ) + unsigned int state = p->state; + + rmb(); + switch ( state ) { case STATE_IOREQ_NONE: /* @@ -459,18 +469,15 @@ hvm_io_assist(sv, ~0ul); break; case STATE_IORESP_READY: /* IORESP_READY -> NONE */ - rmb(); /* see IORESP_READY /then/ read contents of ioreq */ p->state = STATE_IOREQ_NONE; hvm_io_assist(sv, p->data); break; case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */ case STATE_IOREQ_INPROCESS: - wait_on_xen_event_channel(sv->ioreq_evtchn, - (p->state != STATE_IOREQ_READY) && - (p->state != STATE_IOREQ_INPROCESS)); + wait_on_xen_event_channel(sv->ioreq_evtchn, p->state != state); break; default: - gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state); + gdprintk(XENLOG_ERR, "Weird HVM iorequest state %u\n", state); sv->pending = 0; domain_crash(sv->vcpu->domain); return 0; /* bail */ @@ -637,6 +644,30 @@ return 0; } +bool_t is_ioreq_server_page(struct domain *d, const struct page_info *page) +{ + const struct hvm_ioreq_server *s; + bool_t found = 0; + + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); + + list_for_each_entry ( s, + &d->arch.hvm_domain.ioreq_server.list, + list_entry ) + { + if ( (s->ioreq.va && s->ioreq.page == page) || + (s->bufioreq.va && s->bufioreq.page == page) ) + { + found = 1; + break; + } + } + + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); + + return found; +} + static void hvm_remove_ioreq_gmfn( struct domain *d, struct hvm_ioreq_page *iorp) { @@ -1118,7 +1149,7 @@ goto fail1; domain_pause(d); - spin_lock(&d->arch.hvm_domain.ioreq_server.lock); + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); rc = -EEXIST; if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL ) @@ -1141,14 +1172,14 @@ if ( id ) *id = s->id; - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); domain_unpause(d); return 0; fail3: fail2: - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); domain_unpause(d); xfree(s); @@ -1161,7 +1192,7 @@ struct hvm_ioreq_server *s; int rc; - spin_lock(&d->arch.hvm_domain.ioreq_server.lock); + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); rc = -ENOENT; list_for_each_entry ( s, @@ -1190,7 +1221,7 @@ break; } - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); return rc; } @@ -1203,7 +1234,7 @@ struct hvm_ioreq_server *s; int rc; - spin_lock(&d->arch.hvm_domain.ioreq_server.lock); + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); rc = -ENOENT; list_for_each_entry ( s, @@ -1228,7 +1259,7 @@ break; } - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); return rc; } @@ -1239,7 +1270,7 @@ struct hvm_ioreq_server *s; int rc; - spin_lock(&d->arch.hvm_domain.ioreq_server.lock); + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); rc = -ENOENT; list_for_each_entry ( s, @@ -1279,7 +1310,7 @@ } } - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); return rc; } @@ -1290,7 +1321,7 @@ struct hvm_ioreq_server *s; int rc; - spin_lock(&d->arch.hvm_domain.ioreq_server.lock); + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); rc = -ENOENT; list_for_each_entry ( s, @@ -1330,7 +1361,7 @@ } } - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); return rc; } @@ -1341,7 +1372,7 @@ struct list_head *entry; int rc; - spin_lock(&d->arch.hvm_domain.ioreq_server.lock); + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); rc = -ENOENT; list_for_each ( entry, @@ -1370,7 +1401,7 @@ break; } - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); return rc; } @@ -1379,7 +1410,7 @@ struct hvm_ioreq_server *s; int rc; - spin_lock(&d->arch.hvm_domain.ioreq_server.lock); + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); list_for_each_entry ( s, &d->arch.hvm_domain.ioreq_server.list, @@ -1392,7 +1423,7 @@ goto fail; } - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); return 0; @@ -1402,7 +1433,7 @@ list_entry ) hvm_ioreq_server_remove_vcpu(s, v); - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); return rc; } @@ -1411,21 +1442,21 @@ { struct hvm_ioreq_server *s; - spin_lock(&d->arch.hvm_domain.ioreq_server.lock); + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); list_for_each_entry ( s, &d->arch.hvm_domain.ioreq_server.list, list_entry ) hvm_ioreq_server_remove_vcpu(s, v); - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); } static void hvm_destroy_all_ioreq_servers(struct domain *d) { struct hvm_ioreq_server *s, *next; - spin_lock(&d->arch.hvm_domain.ioreq_server.lock); + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); /* No need to domain_pause() as the domain is being torn down */ @@ -1448,7 +1479,7 @@ xfree(s); } - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); } static int hvm_replace_event_channel(struct vcpu *v, domid_t remote_domid, @@ -1472,7 +1503,7 @@ struct hvm_ioreq_server *s; int rc = 0; - spin_lock(&d->arch.hvm_domain.ioreq_server.lock); + spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); /* * Lack of ioreq server is not a failure. HVM_PARAM_DM_DOMAIN will @@ -1521,7 +1552,7 @@ domain_unpause(d); done: - spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); return rc; } @@ -2811,6 +2842,7 @@ gprintk(XENLOG_INFO, "Triple fault - invoking HVM shutdown action %d\n", reason); + vcpu_show_execution_state(v); domain_shutdown(d, reason); } @@ -3089,6 +3121,14 @@ goto out_put_gfn; } + if ( (p2mt == p2m_mmio_direct) && is_hardware_domain(currd) && + npfec.write_access && npfec.present && + (hvm_emulate_one_mmio(mfn_x(mfn), gla) == X86EMUL_OKAY) ) + { + rc = 1; + goto out_put_gfn; + } + /* If we fell through, the vcpu will retry now that access restrictions have * been removed. It may fault again if the p2m entry type still requires so. * Otherwise, this is an error condition. */ @@ -3170,6 +3210,30 @@ return X86EMUL_EXCEPTION; } + if ( (value & EFER_LME) && !(v->arch.hvm_vcpu.guest_efer & EFER_LME) ) + { + struct segment_register cs; + + hvm_get_segment_register(v, x86_seg_cs, &cs); + + /* + * %cs may be loaded with both .D and .L set in legacy mode, and both + * are captured in the VMCS/VMCB. + * + * If a guest does this and then tries to transition into long mode, + * the vmentry from setting LME fails due to invalid guest state, + * because %cr0.PG is still clear. + * + * When LME becomes set, clobber %cs.L to keep the guest firmly in + * compatibility mode until it reloads %cs itself. + */ + if ( cs.attr.fields.l ) + { + cs.attr.fields.l = 0; + hvm_set_segment_register(v, x86_seg_cs, &cs); + } + } + if ( nestedhvm_enabled(v->domain) && cpu_has_svm && ((value & EFER_SVME) == 0 ) && ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_SVME) ) @@ -3599,7 +3663,7 @@ * Certain of them are not done in native real mode anyway. */ addr = (uint32_t)(addr + reg->base); - last_byte = (uint32_t)addr + bytes - 1; + last_byte = (uint32_t)addr + bytes - !!bytes; if ( last_byte < addr ) return 0; } @@ -3609,6 +3673,10 @@ * COMPATIBILITY MODE: Apply segment checks and add base. */ + /* Segment not valid for use (cooked meaning of .p)? */ + if ( !reg->attr.fields.p ) + return 0; + switch ( access_type ) { case hvm_access_read: @@ -3623,7 +3691,7 @@ break; } - last_byte = (uint32_t)offset + bytes - 1; + last_byte = (uint32_t)offset + bytes - !!bytes; /* Is this a grows-down data segment? Special limit check if so. */ if ( (reg->attr.fields.type & 0xc) == 0x4 ) @@ -3654,7 +3722,7 @@ if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) ) addr += reg->base; - last_byte = addr + bytes - 1; + last_byte = addr + bytes - !!bytes; if ( !is_canonical_address(addr) || last_byte < addr || !is_canonical_address(last_byte) ) return 0; @@ -3776,17 +3844,16 @@ } static int hvm_load_segment_selector( - enum x86_segment seg, uint16_t sel) + enum x86_segment seg, uint16_t sel, unsigned int eflags) { struct segment_register desctab, cs, segr; struct desc_struct *pdesc, desc; u8 dpl, rpl, cpl; bool_t writable; int fault_type = TRAP_invalid_tss; - struct cpu_user_regs *regs = guest_cpu_user_regs(); struct vcpu *v = current; - if ( regs->eflags & X86_EFLAGS_VM ) + if ( eflags & X86_EFLAGS_VM ) { segr.sel = sel; segr.base = (uint32_t)sel << 4; @@ -3814,6 +3881,10 @@ hvm_get_segment_register( v, (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab); + /* Segment not valid for use (cooked meaning of .p)? */ + if ( !desctab.attr.fields.p ) + goto fail; + /* Check against descriptor table limit. */ if ( ((sel & 0xfff8) + 7) > desctab.limit ) goto fail; @@ -3825,13 +3896,6 @@ do { desc = *pdesc; - /* Segment present in memory? */ - if ( !(desc.b & _SEGMENT_P) ) - { - fault_type = TRAP_no_segment; - goto unmap_and_fail; - } - /* LDT descriptor is a system segment. All others are code/data. */ if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) ) goto unmap_and_fail; @@ -3876,6 +3940,14 @@ goto unmap_and_fail; break; } + + /* Segment present in memory? */ + if ( !(desc.b & _SEGMENT_P) ) + { + fault_type = (seg != x86_seg_ss) ? TRAP_no_segment + : TRAP_stack_error; + goto unmap_and_fail; + } } while ( !(desc.b & 0x100) && /* Ensure Accessed flag is set */ writable && /* except if we are to discard writes */ (cmpxchg(&pdesc->b, desc.b, desc.b | 0x100) != desc.b) ); @@ -3962,12 +4034,6 @@ if ( tr.attr.fields.g ) tr.limit = (tr.limit << 12) | 0xfffu; - if ( !tr.attr.fields.p ) - { - hvm_inject_hw_exception(TRAP_no_segment, tss_sel & 0xfff8); - goto out; - } - if ( tr.attr.fields.type != ((taskswitch_reason == TSW_iret) ? 0xb : 0x9) ) { hvm_inject_hw_exception( @@ -3976,6 +4042,12 @@ goto out; } + if ( !tr.attr.fields.p ) + { + hvm_inject_hw_exception(TRAP_no_segment, tss_sel & 0xfff8); + goto out; + } + if ( tr.limit < (sizeof(tss)-1) ) { hvm_inject_hw_exception(TRAP_invalid_tss, tss_sel & 0xfff8); @@ -4032,6 +4104,8 @@ if ( rc != HVMCOPY_okay ) goto out; + if ( hvm_load_segment_selector(x86_seg_ldtr, tss.ldt, 0) ) + goto out; if ( hvm_set_cr3(tss.cr3, 1) ) goto out; @@ -4054,13 +4128,12 @@ } exn_raised = 0; - if ( hvm_load_segment_selector(x86_seg_ldtr, tss.ldt) || - hvm_load_segment_selector(x86_seg_es, tss.es) || - hvm_load_segment_selector(x86_seg_cs, tss.cs) || - hvm_load_segment_selector(x86_seg_ss, tss.ss) || - hvm_load_segment_selector(x86_seg_ds, tss.ds) || - hvm_load_segment_selector(x86_seg_fs, tss.fs) || - hvm_load_segment_selector(x86_seg_gs, tss.gs) ) + if ( hvm_load_segment_selector(x86_seg_es, tss.es, tss.eflags) || + hvm_load_segment_selector(x86_seg_cs, tss.cs, tss.eflags) || + hvm_load_segment_selector(x86_seg_ss, tss.ss, tss.eflags) || + hvm_load_segment_selector(x86_seg_ds, tss.ds, tss.eflags) || + hvm_load_segment_selector(x86_seg_fs, tss.fs, tss.eflags) || + hvm_load_segment_selector(x86_seg_gs, tss.gs, tss.eflags) ) exn_raised = 1; rc = hvm_copy_to_guest_virt( @@ -4071,7 +4144,7 @@ goto out; if ( (tss.trace & 1) && !exn_raised ) - hvm_inject_hw_exception(TRAP_debug, tss_sel & 0xfff8); + hvm_inject_hw_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE); tr.attr.fields.type = 0xb; /* busy 32-bit tss */ hvm_set_segment_register(v, x86_seg_tr, &tr); @@ -4510,9 +4583,7 @@ *ebx &= ~cpufeat_mask(X86_FEATURE_SMAP); /* Don't expose MPX to hvm when VMX support is not available */ - if ( (count == 0) && - (!(vmx_vmexit_control & VM_EXIT_CLEAR_BNDCFGS) || - !(vmx_vmentry_control & VM_ENTRY_LOAD_BNDCFGS)) ) + if ( (count == 0) && !cpu_has_vmx_mpx ) *ebx &= ~cpufeat_mask(X86_FEATURE_MPX); /* Don't expose INVPCID to non-hap hvm. */ @@ -4559,8 +4630,7 @@ break; case 0x80000008: - count = cpuid_eax(0x80000008); - count = (count >> 16) & 0xff ?: count & 0xff; + count = d->arch.paging.gfn_bits + PAGE_SHIFT; if ( (*eax & 0xff) > count ) *eax = (*eax & ~0xff) | count; @@ -4771,7 +4841,7 @@ break; case MSR_IA32_TSC: - hvm_set_guest_tsc(v, msr_content); + hvm_set_guest_tsc_msr(v, msr_content); break; case MSR_IA32_TSC_ADJUST: @@ -5867,6 +5937,7 @@ case HVM_PARAM_VM_GENERATION_ID_ADDR: case HVM_PARAM_STORE_EVTCHN: case HVM_PARAM_CONSOLE_EVTCHN: + case HVM_PARAM_X87_FIP_WIDTH: break; /* * The following parameters must not be set by the guest @@ -6062,6 +6133,14 @@ break; } + case HVM_PARAM_X87_FIP_WIDTH: + if ( a.value != 0 && a.value != 4 && a.value != 8 ) + { + rc = -EINVAL; + break; + } + d->arch.x87_fip_width = a.value; + break; } if ( rc != 0 ) @@ -6098,6 +6177,7 @@ case HVM_PARAM_CONSOLE_PFN: case HVM_PARAM_CONSOLE_EVTCHN: case HVM_PARAM_ALTP2M: + case HVM_PARAM_X87_FIP_WIDTH: break; /* * The following parameters must not be read by the guest @@ -6147,6 +6227,9 @@ case HVM_PARAM_ACPI_S_STATE: a.value = d->arch.hvm_domain.is_s3_suspended ? 3 : 0; break; + case HVM_PARAM_X87_FIP_WIDTH: + a.value = d->arch.x87_fip_width; + break; case HVM_PARAM_IOREQ_PFN: case HVM_PARAM_BUFIOREQ_PFN: case HVM_PARAM_BUFIOREQ_EVTCHN: diff -Nru xen-4.6.0/xen/arch/x86/hvm/intercept.c xen-4.6.5/xen/arch/x86/hvm/intercept.c --- xen-4.6.0/xen/arch/x86/hvm/intercept.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/intercept.c 2017-03-07 16:19:05.000000000 +0000 @@ -148,8 +148,8 @@ ASSERT_UNREACHABLE(); /* fall through */ default: - rc = X86EMUL_UNHANDLEABLE; - break; + domain_crash(current->domain); + return X86EMUL_UNHANDLEABLE; } if ( rc != X86EMUL_OKAY ) break; @@ -178,8 +178,8 @@ ASSERT_UNREACHABLE(); /* fall through */ default: - rc = X86EMUL_UNHANDLEABLE; - break; + domain_crash(current->domain); + return X86EMUL_UNHANDLEABLE; } if ( rc != X86EMUL_OKAY ) break; @@ -196,8 +196,20 @@ } } - if ( i != 0 && rc == X86EMUL_UNHANDLEABLE ) - domain_crash(current->domain); + if ( i ) + { + p->count = i; + rc = X86EMUL_OKAY; + } + else if ( rc == X86EMUL_UNHANDLEABLE ) + { + /* + * Don't forward entire batches to the device model: This would + * prevent the internal handlers to see subsequent iterations of + * the request. + */ + p->count = 1; + } return rc; } diff -Nru xen-4.6.0/xen/arch/x86/hvm/io.c xen-4.6.5/xen/arch/x86/hvm/io.c --- xen-4.6.0/xen/arch/x86/hvm/io.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/io.c 2017-03-07 16:19:05.000000000 +0000 @@ -173,12 +173,12 @@ const ioreq_t *p) { struct vcpu *curr = current; - struct hvm_iommu *hd = domain_hvm_iommu(curr->domain); + const struct domain_iommu *dio = dom_iommu(curr->domain); struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; struct g2m_ioport *g2m_ioport; unsigned int start, end; - list_for_each_entry( g2m_ioport, &hd->arch.g2m_ioport_list, list ) + list_for_each_entry( g2m_ioport, &dio->arch.g2m_ioport_list, list ) { start = g2m_ioport->gport; end = start + g2m_ioport->np; diff -Nru xen-4.6.0/xen/arch/x86/hvm/irq.c xen-4.6.5/xen/arch/x86/hvm/irq.c --- xen-4.6.0/xen/arch/x86/hvm/irq.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/irq.c 2017-03-07 16:19:05.000000000 +0000 @@ -382,7 +382,8 @@ spin_unlock(&d->arch.hvm_domain.irq_lock); - dprintk(XENLOG_G_INFO, "Dom%u callback via changed to ", d->domain_id); +#ifndef NDEBUG + printk(XENLOG_G_INFO "Dom%u callback via changed to ", d->domain_id); switch ( via_type ) { case HVMIRQ_callback_gsi: @@ -398,6 +399,7 @@ printk("None\n"); break; } +#endif } struct hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v) diff -Nru xen-4.6.0/xen/arch/x86/hvm/mtrr.c xen-4.6.5/xen/arch/x86/hvm/mtrr.c --- xen-4.6.0/xen/arch/x86/hvm/mtrr.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/mtrr.c 2017-03-07 16:19:05.000000000 +0000 @@ -26,8 +26,6 @@ #include #include -static uint32_t size_or_mask; - /* Get page attribute fields (PAn) from PAT MSR. */ #define pat_cr_2_paf(pat_cr,n) ((((uint64_t)pat_cr) >> ((n)<<3)) & 0xff) @@ -77,61 +75,28 @@ static uint8_t __read_mostly pat_entry_tbl[PAT_TYPE_NUMS] = { [0 ... PAT_TYPE_NUMS-1] = INVALID_MEM_TYPE }; -static void get_mtrr_range(uint64_t base_msr, uint64_t mask_msr, - uint64_t *base, uint64_t *end) +bool_t is_var_mtrr_overlapped(const struct mtrr_state *m) { - uint32_t mask_lo = (uint32_t)mask_msr; - uint32_t mask_hi = (uint32_t)(mask_msr >> 32); - uint32_t base_lo = (uint32_t)base_msr; - uint32_t base_hi = (uint32_t)(base_msr >> 32); - uint32_t size; - - if ( !(mask_lo & MTRR_PHYSMASK_VALID) ) - { - /* Invalid (i.e. free) range */ - *base = 0; - *end = 0; - return; - } - - /* Work out the shifted address mask. */ - mask_lo = (size_or_mask | (mask_hi << (32 - PAGE_SHIFT)) | - (mask_lo >> PAGE_SHIFT)); - - /* This works correctly if size is a power of two (a contiguous range). */ - size = -mask_lo; - *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; - *end = *base + size - 1; -} - -bool_t is_var_mtrr_overlapped(struct mtrr_state *m) -{ - int32_t seg, i; - uint64_t phys_base, phys_mask, phys_base_pre, phys_mask_pre; - uint64_t base_pre, end_pre, base, end; - uint8_t num_var_ranges = (uint8_t)m->mtrr_cap; + unsigned int seg, i; + unsigned int num_var_ranges = (uint8_t)m->mtrr_cap; for ( i = 0; i < num_var_ranges; i++ ) { - phys_base_pre = ((uint64_t*)m->var_ranges)[i*2]; - phys_mask_pre = ((uint64_t*)m->var_ranges)[i*2 + 1]; + uint64_t base1 = m->var_ranges[i].base >> PAGE_SHIFT; + uint64_t mask1 = m->var_ranges[i].mask >> PAGE_SHIFT; - get_mtrr_range(phys_base_pre, phys_mask_pre, - &base_pre, &end_pre); + if ( !(m->var_ranges[i].mask & MTRR_PHYSMASK_VALID) ) + continue; for ( seg = i + 1; seg < num_var_ranges; seg ++ ) { - phys_base = ((uint64_t*)m->var_ranges)[seg*2]; - phys_mask = ((uint64_t*)m->var_ranges)[seg*2 + 1]; + uint64_t base2 = m->var_ranges[seg].base >> PAGE_SHIFT; + uint64_t mask2 = m->var_ranges[seg].mask >> PAGE_SHIFT; - get_mtrr_range(phys_base, phys_mask, - &base, &end); + if ( !(m->var_ranges[seg].mask & MTRR_PHYSMASK_VALID) ) + continue; - if ( ((base_pre != end_pre) && (base != end)) - || ((base >= base_pre) && (base <= end_pre)) - || ((end >= base_pre) && (end <= end_pre)) - || ((base_pre >= base) && (base_pre <= end)) - || ((end_pre >= base) && (end_pre <= end)) ) + if ( (base1 & mask1 & mask2) == (base2 & mask2 & mask1) ) { /* MTRR is overlapped. */ return 1; @@ -168,8 +133,6 @@ } } - size_or_mask = ~((1 << (paddr_bits - PAGE_SHIFT)) - 1); - return 0; } __initcall(hvm_mtrr_pat_init); @@ -807,8 +770,19 @@ if ( v->domain != d ) v = d->vcpu ? d->vcpu[0] : NULL; - if ( !mfn_valid(mfn_x(mfn)) ) - return MTRR_TYPE_UNCACHABLE; + /* Mask, not add, for order so it works with INVALID_MFN on unmapping */ + if ( rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn), + mfn_x(mfn) | ((1UL << order) - 1)) ) + { + if ( !order || rangeset_contains_range(mmio_ro_ranges, mfn_x(mfn), + mfn_x(mfn) | ((1UL << order) - 1)) ) + { + *ipat = 1; + return MTRR_TYPE_UNCACHABLE; + } + /* Force invalid memory type so resolve_misconfig() will split it */ + return -1; + } switch ( hvm_get_mem_pinned_cacheattr(d, gfn, order, &type) ) { @@ -819,15 +793,6 @@ return -1; } - if ( !need_iommu(d) && !cache_flush_permitted(d) ) - { - ASSERT(!direct_mmio || - !((mfn_x(mfn) ^ d->arch.hvm_domain.vmx.apic_access_mfn) >> - order)); - *ipat = 1; - return MTRR_TYPE_WRBACK; - } - if ( direct_mmio ) { if ( (mfn_x(mfn) ^ d->arch.hvm_domain.vmx.apic_access_mfn) >> order ) @@ -837,6 +802,21 @@ *ipat = 1; return MTRR_TYPE_WRBACK; } + + if ( !mfn_valid(mfn_x(mfn)) ) + { + *ipat = 1; + return MTRR_TYPE_UNCACHABLE; + } + + if ( !need_iommu(d) && !cache_flush_permitted(d) ) + { + ASSERT(!direct_mmio || + !((mfn_x(mfn) ^ d->arch.hvm_domain.vmx.apic_access_mfn) >> + order)); + *ipat = 1; + return MTRR_TYPE_WRBACK; + } gmtrr_mtype = is_hvm_domain(d) && v ? get_mtrr_type(&v->arch.hvm_vcpu.mtrr, diff -Nru xen-4.6.0/xen/arch/x86/hvm/save.c xen-4.6.5/xen/arch/x86/hvm/save.c --- xen-4.6.0/xen/arch/x86/hvm/save.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/save.c 2017-03-07 16:19:05.000000000 +0000 @@ -73,7 +73,7 @@ d->arch.hvm_domain.sync_tsc = rdtsc(); /* VGA state is not saved/restored, so we nobble the cache. */ - d->arch.hvm_domain.stdvga.cache = 0; + d->arch.hvm_domain.stdvga.cache = STDVGA_CACHE_DISABLED; return 0; } diff -Nru xen-4.6.0/xen/arch/x86/hvm/stdvga.c xen-4.6.5/xen/arch/x86/hvm/stdvga.c --- xen-4.6.0/xen/arch/x86/hvm/stdvga.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/stdvga.c 2017-03-07 16:19:05.000000000 +0000 @@ -101,6 +101,37 @@ unmap_domain_page(p); } +static void stdvga_try_cache_enable(struct hvm_hw_stdvga *s) +{ + /* + * Caching mode can only be enabled if the the cache has + * never been used before. As soon as it is disabled, it will + * become out-of-sync with the VGA device model and since no + * mechanism exists to acquire current VRAM state from the + * device model, re-enabling it would lead to stale data being + * seen by the guest. + */ + if ( s->cache != STDVGA_CACHE_UNINITIALIZED ) + return; + + gdprintk(XENLOG_INFO, "entering caching mode\n"); + s->cache = STDVGA_CACHE_ENABLED; +} + +static void stdvga_cache_disable(struct hvm_hw_stdvga *s) +{ + if ( s->cache != STDVGA_CACHE_ENABLED ) + return; + + gdprintk(XENLOG_INFO, "leaving caching mode\n"); + s->cache = STDVGA_CACHE_DISABLED; +} + +static bool_t stdvga_cache_is_enabled(const struct hvm_hw_stdvga *s) +{ + return s->cache == STDVGA_CACHE_ENABLED; +} + static int stdvga_outb(uint64_t addr, uint8_t val) { struct hvm_hw_stdvga *s = ¤t->domain->arch.hvm_domain.stdvga; @@ -139,12 +170,8 @@ if ( !prev_stdvga && s->stdvga ) { - /* - * (Re)start caching of video buffer. - * XXX TODO: In case of a restart the cache could be unsynced. - */ - s->cache = 1; - gdprintk(XENLOG_INFO, "entering stdvga and caching modes\n"); + gdprintk(XENLOG_INFO, "entering stdvga mode\n"); + stdvga_try_cache_enable(s); } else if ( prev_stdvga && !s->stdvga ) { @@ -441,7 +468,7 @@ }; struct hvm_ioreq_server *srv; - if ( !s->cache || !s->stdvga ) + if ( !stdvga_cache_is_enabled(s) || !s->stdvga ) goto done; /* Intercept mmio write */ @@ -515,15 +542,12 @@ * not active since we can assert, when in stdvga mode, that writes * to VRAM have no side effect and thus we can try to buffer them. */ - if ( s->cache ) - { - gdprintk(XENLOG_INFO, "leaving caching mode\n"); - s->cache = 0; - } + stdvga_cache_disable(s); goto reject; } - else if ( p->dir == IOREQ_READ && (!s->cache || !s->stdvga) ) + else if ( p->dir == IOREQ_READ && + (!stdvga_cache_is_enabled(s) || !s->stdvga) ) goto reject; /* s->lock intentionally held */ diff -Nru xen-4.6.0/xen/arch/x86/hvm/svm/emulate.c xen-4.6.5/xen/arch/x86/hvm/svm/emulate.c --- xen-4.6.0/xen/arch/x86/hvm/svm/emulate.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/svm/emulate.c 2017-03-07 16:19:05.000000000 +0000 @@ -60,18 +60,18 @@ { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - if ( !cpu_has_svm_nrips || (vmcb->nextrip <= vmcb->rip) ) + if ( !cpu_has_svm_nrips ) return 0; #ifndef NDEBUG switch ( vmcb->exitcode ) { - case VMEXIT_CR0_READ... VMEXIT_DR15_WRITE: + case VMEXIT_CR0_READ ... VMEXIT_DR15_WRITE: /* faults due to instruction intercepts */ /* (exitcodes 84-95) are reserved */ case VMEXIT_IDTR_READ ... VMEXIT_TR_WRITE: case VMEXIT_RDTSC ... VMEXIT_MSR: - case VMEXIT_VMRUN ... VMEXIT_XSETBV: + case VMEXIT_VMRUN ... VMEXIT_XSETBV: /* ...and the rest of the #VMEXITs */ case VMEXIT_CR0_SEL_WRITE: case VMEXIT_EXCEPTION_BP: @@ -152,14 +152,16 @@ const enum instruction_index *list, unsigned int list_count) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - unsigned int i, j, inst_len = 0; + unsigned int i, j; enum instruction_index instr = 0; u8 buf[MAX_INST_LEN]; const u8 *opcode = NULL; - unsigned long fetch_addr; + unsigned long fetch_addr, inst_len; unsigned int fetch_len; - if ( (inst_len = svm_nextrip_insn_length(v)) != 0 ) + if ( (inst_len = svm_nextrip_insn_length(v)) > MAX_INST_LEN ) + gprintk(XENLOG_WARNING, "NRip reported inst_len %lu\n", inst_len); + else if ( inst_len != 0 ) return inst_len; if ( vmcb->exitcode == VMEXIT_IOIO ) @@ -173,7 +175,7 @@ if ( !fetch(v, buf, fetch_addr, fetch_len) ) return 0; - while ( (inst_len < MAX_INST_LEN) && is_prefix(buf[inst_len]) ) + for ( inst_len = 0; (inst_len < MAX_INST_LEN) && is_prefix(buf[inst_len]); ) { inst_len++; if ( inst_len >= fetch_len ) diff -Nru xen-4.6.0/xen/arch/x86/hvm/svm/svm.c xen-4.6.5/xen/arch/x86/hvm/svm/svm.c --- xen-4.6.0/xen/arch/x86/hvm/svm/svm.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/svm/svm.c 2017-03-07 16:19:05.000000000 +0000 @@ -620,6 +620,7 @@ { case x86_seg_cs: memcpy(reg, &vmcb->cs, sizeof(*reg)); + reg->attr.fields.p = 1; reg->attr.fields.g = reg->limit > 0xFFFFF; break; case x86_seg_ds: @@ -653,13 +654,16 @@ case x86_seg_tr: svm_sync_vmcb(v); memcpy(reg, &vmcb->tr, sizeof(*reg)); + reg->attr.fields.p = 1; reg->attr.fields.type |= 0x2; break; case x86_seg_gdtr: memcpy(reg, &vmcb->gdtr, sizeof(*reg)); + reg->attr.bytes = 0x80; break; case x86_seg_idtr: memcpy(reg, &vmcb->idtr, sizeof(*reg)); + reg->attr.bytes = 0x80; break; case x86_seg_ldtr: svm_sync_vmcb(v); @@ -1043,10 +1047,11 @@ unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) ) { uint32_t intercepts = vmcb_get_exception_intercepts(vmcb); - uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3); + v->arch.hvm_vcpu.debug_state_latch = debug_state; vmcb_set_exception_intercepts( - vmcb, debug_state ? (intercepts | mask) : (intercepts & ~mask)); + vmcb, debug_state ? (intercepts | (1U << TRAP_int3)) + : (intercepts & ~(1U << TRAP_int3))); } if ( v->arch.hvm_svm.launch_core != smp_processor_id() ) @@ -1224,17 +1229,14 @@ { case X86_EVENTTYPE_SW_INTERRUPT: /* int $n */ /* - * Injection type 4 (software interrupt) is only supported with - * NextRIP support. Without NextRIP, the emulator will have performed - * DPL and presence checks for us. + * Software interrupts (type 4) cannot be properly injected if the + * processor doesn't support NextRIP. Without NextRIP, the emulator + * will have performed DPL and presence checks for us, and will have + * moved eip forward if appropriate. */ if ( cpu_has_svm_nrips ) - { vmcb->nextrip = regs->eip + _trap.insn_len; - event.fields.type = X86_EVENTTYPE_SW_INTERRUPT; - } - else - event.fields.type = X86_EVENTTYPE_HW_EXCEPTION; + event.fields.type = X86_EVENTTYPE_SW_INTERRUPT; break; case X86_EVENTTYPE_PRI_SW_EXCEPTION: /* icebp */ @@ -1466,7 +1468,8 @@ struct npfec npfec = { .read_access = !(pfec & PFEC_insn_fetch), .write_access = !!(pfec & PFEC_write_access), - .insn_fetch = !!(pfec & PFEC_insn_fetch) + .insn_fetch = !!(pfec & PFEC_insn_fetch), + .present = !!(pfec & PFEC_page_present), }; /* These bits are mutually exclusive */ @@ -1930,26 +1933,28 @@ static void svm_do_msr_access(struct cpu_user_regs *regs) { - int rc, inst_len; - struct vcpu *v = current; - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - uint64_t msr_content; + struct vcpu *curr = current; + bool_t rdmsr = !curr->arch.hvm_svm.vmcb->exitinfo1; + int rc, inst_len = __get_instruction_length( + curr, rdmsr ? INSTR_RDMSR : INSTR_WRMSR); + + if ( inst_len == 0 ) + return; - if ( vmcb->exitinfo1 == 0 ) + if ( rdmsr ) { - if ( (inst_len = __get_instruction_length(v, INSTR_RDMSR)) == 0 ) - return; - rc = hvm_msr_read_intercept(regs->ecx, &msr_content); - regs->eax = (uint32_t)msr_content; - regs->edx = (uint32_t)(msr_content >> 32); + uint64_t msr_content = 0; + + rc = hvm_msr_read_intercept(regs->_ecx, &msr_content); + if ( rc == X86EMUL_OKAY ) + { + regs->rax = (uint32_t)msr_content; + regs->rdx = (uint32_t)(msr_content >> 32); + } } else - { - if ( (inst_len = __get_instruction_length(v, INSTR_WRMSR)) == 0 ) - return; - msr_content = ((uint64_t)regs->edx << 32) | (uint32_t)regs->eax; - rc = hvm_msr_write_intercept(regs->ecx, msr_content, 1); - } + rc = hvm_msr_write_intercept(regs->_ecx, + (regs->rdx << 32) | regs->_eax, 1); if ( rc == X86EMUL_OKAY ) __update_guest_eip(regs, inst_len); @@ -2008,7 +2013,7 @@ if ( !nestedsvm_vmcb_map(v, vmcbaddr) ) { gdprintk(XENLOG_ERR, "VMRUN: mapping vmcb failed, injecting #GP\n"); - hvm_inject_hw_exception(TRAP_gp_fault, HVM_DELIVER_NO_ERROR_CODE); + hvm_inject_hw_exception(TRAP_gp_fault, 0); return; } @@ -2047,7 +2052,6 @@ struct cpu_user_regs *regs, struct vcpu *v, uint64_t vmcbaddr) { - int ret; unsigned int inst_len; struct page_info *page; @@ -2057,8 +2061,8 @@ if ( !nsvm_efer_svm_enabled(v) ) { gdprintk(XENLOG_ERR, "VMLOAD: nestedhvm disabled, injecting #UD\n"); - ret = TRAP_invalid_op; - goto inject; + hvm_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE); + return; } page = nsvm_get_nvmcb_page(v, vmcbaddr); @@ -2066,8 +2070,8 @@ { gdprintk(XENLOG_ERR, "VMLOAD: mapping failed, injecting #GP\n"); - ret = TRAP_gp_fault; - goto inject; + hvm_inject_hw_exception(TRAP_gp_fault, 0); + return; } svm_vmload_pa(page_to_maddr(page)); @@ -2077,11 +2081,6 @@ v->arch.hvm_svm.vmcb_in_sync = 0; __update_guest_eip(regs, inst_len); - return; - - inject: - hvm_inject_hw_exception(ret, HVM_DELIVER_NO_ERROR_CODE); - return; } static void @@ -2089,7 +2088,6 @@ struct cpu_user_regs *regs, struct vcpu *v, uint64_t vmcbaddr) { - int ret; unsigned int inst_len; struct page_info *page; @@ -2099,8 +2097,8 @@ if ( !nsvm_efer_svm_enabled(v) ) { gdprintk(XENLOG_ERR, "VMSAVE: nestedhvm disabled, injecting #UD\n"); - ret = TRAP_invalid_op; - goto inject; + hvm_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE); + return; } page = nsvm_get_nvmcb_page(v, vmcbaddr); @@ -2108,18 +2106,13 @@ { gdprintk(XENLOG_ERR, "VMSAVE: mapping vmcb failed, injecting #GP\n"); - ret = TRAP_gp_fault; - goto inject; + hvm_inject_hw_exception(TRAP_gp_fault, 0); + return; } svm_vmsave_pa(page_to_maddr(page)); put_page(page); __update_guest_eip(regs, inst_len); - return; - - inject: - hvm_inject_hw_exception(ret, HVM_DELIVER_NO_ERROR_CODE); - return; } static void svm_vmexit_ud_intercept(struct cpu_user_regs *regs) @@ -2137,6 +2130,10 @@ { regs->eip += sizeof(sig); regs->eflags &= ~X86_EFLAGS_RF; + + /* Zero the upper 32 bits of %rip if not in long mode. */ + if ( svm_guest_x86_mode(current) != 8 ) + regs->eip = regs->_eip; } } @@ -2434,8 +2431,9 @@ case VMEXIT_EXCEPTION_DB: if ( !v->domain->debugger_attached ) - goto unexpected_exit_type; - domain_pause_for_debugger(); + hvm_inject_hw_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE); + else + domain_pause_for_debugger(); break; case VMEXIT_EXCEPTION_BP: @@ -2483,6 +2481,11 @@ break; } + case VMEXIT_EXCEPTION_AC: + HVMTRACE_1D(TRAP, TRAP_alignment_check); + hvm_inject_hw_exception(TRAP_alignment_check, vmcb->exitinfo1); + break; + case VMEXIT_EXCEPTION_UD: svm_vmexit_ud_intercept(regs); break; diff -Nru xen-4.6.0/xen/arch/x86/hvm/svm/svmdebug.c xen-4.6.5/xen/arch/x86/hvm/svm/svmdebug.c --- xen-4.6.0/xen/arch/x86/hvm/svm/svmdebug.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/svm/svmdebug.c 2017-03-07 16:19:05.000000000 +0000 @@ -48,6 +48,10 @@ vmcb->tlb_control, (unsigned long long)vmcb->_vintr.bytes, (unsigned long long)vmcb->interrupt_shadow); + printk("eventinj %016"PRIx64", valid? %d, ec? %d, type %u, vector %#x\n", + vmcb->eventinj.bytes, vmcb->eventinj.fields.v, + vmcb->eventinj.fields.ev, vmcb->eventinj.fields.type, + vmcb->eventinj.fields.vector); printk("exitcode = %#Lx exitintinfo = %#Lx\n", (unsigned long long)vmcb->exitcode, (unsigned long long)vmcb->exitintinfo.bytes); diff -Nru xen-4.6.0/xen/arch/x86/hvm/viridian.c xen-4.6.5/xen/arch/x86/hvm/viridian.c --- xen-4.6.0/xen/arch/x86/hvm/viridian.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/viridian.c 2017-03-07 16:19:05.000000000 +0000 @@ -550,9 +550,9 @@ output_params_gpa = regs->r8; break; case 4: - input.raw = ((uint64_t)regs->edx << 32) | regs->eax; - input_params_gpa = ((uint64_t)regs->ebx << 32) | regs->ecx; - output_params_gpa = ((uint64_t)regs->edi << 32) | regs->esi; + input.raw = (regs->rdx << 32) | regs->_eax; + input_params_gpa = (regs->rbx << 32) | regs->_ecx; + output_params_gpa = (regs->rdi << 32) | regs->_esi; break; default: goto out; @@ -577,8 +577,8 @@ regs->rax = output.raw; break; default: - regs->edx = output.raw >> 32; - regs->eax = output.raw; + regs->rdx = output.raw >> 32; + regs->rax = (uint32_t)output.raw; break; } diff -Nru xen-4.6.0/xen/arch/x86/hvm/vmsi.c xen-4.6.5/xen/arch/x86/hvm/vmsi.c --- xen-4.6.0/xen/arch/x86/hvm/vmsi.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/vmsi.c 2017-03-07 16:19:05.000000000 +0000 @@ -295,6 +295,7 @@ if ( len != 8 || !index ) goto out; val >>= 32; + address += 4; } /* Exit to device model when unmasking and address/data got modified. */ @@ -335,12 +336,64 @@ static int msixtbl_range(struct vcpu *v, unsigned long addr) { const struct msi_desc *desc; + const ioreq_t *r; rcu_read_lock(&msixtbl_rcu_lock); desc = msixtbl_addr_to_desc(msixtbl_find_entry(v, addr), addr); rcu_read_unlock(&msixtbl_rcu_lock); - return !!desc; + if ( desc ) + return 1; + + r = &v->arch.hvm_vcpu.hvm_io.io_req; + if ( r->state != STATE_IOREQ_READY || r->addr != addr ) + return 0; + ASSERT(r->type == IOREQ_TYPE_COPY); + if ( r->dir == IOREQ_WRITE ) + { + unsigned int size = r->size; + + if ( !r->data_is_ptr ) + { + uint64_t data = r->data; + + if ( size == 8 ) + { + BUILD_BUG_ON(!(PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET & 4)); + data >>= 32; + addr += size = 4; + } + if ( size == 4 && + ((addr & (PCI_MSIX_ENTRY_SIZE - 1)) == + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) && + !(data & PCI_MSIX_VECTOR_BITMASK) ) + { + v->arch.hvm_vcpu.hvm_io.msix_snoop_address = addr; + v->arch.hvm_vcpu.hvm_io.msix_snoop_gpa = 0; + } + } + else if ( (size == 4 || size == 8) && + /* Only support forward REP MOVS for now. */ + !r->df && + /* + * Only fully support accesses to a single table entry for + * now (if multiple ones get written to in one go, only the + * final one gets dealt with). + */ + r->count && r->count <= PCI_MSIX_ENTRY_SIZE / size && + !((addr + (size * r->count)) & (PCI_MSIX_ENTRY_SIZE - 1)) ) + { + BUILD_BUG_ON((PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET + 4) & + (PCI_MSIX_ENTRY_SIZE - 1)); + + v->arch.hvm_vcpu.hvm_io.msix_snoop_address = + addr + size * r->count - 4; + v->arch.hvm_vcpu.hvm_io.msix_snoop_gpa = + r->data + size * r->count - 4; + } + } + + return 0; } static const struct hvm_mmio_ops msixtbl_mmio_ops = { @@ -406,9 +459,6 @@ return r; } - if ( !irq_desc->msi_desc ) - goto out; - msi_desc = irq_desc->msi_desc; if ( !msi_desc ) goto out; @@ -433,6 +483,24 @@ out: spin_unlock_irq(&irq_desc->lock); xfree(new_entry); + + if ( !r ) + { + struct vcpu *v; + + for_each_vcpu ( d, v ) + { + if ( (v->pause_flags & VPF_blocked_in_xen) && + !v->arch.hvm_vcpu.hvm_io.msix_snoop_gpa && + v->arch.hvm_vcpu.hvm_io.msix_snoop_address == + (gtable + msi_desc->msi_attrib.entry_nr * + PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) ) + v->arch.hvm_vcpu.hvm_io.msix_unmask_address = + v->arch.hvm_vcpu.hvm_io.msix_snoop_address; + } + } + return r; } @@ -450,9 +518,6 @@ if ( !irq_desc ) return; - if ( !irq_desc->msi_desc ) - goto out; - msi_desc = irq_desc->msi_desc; if ( !msi_desc ) goto out; @@ -508,6 +573,28 @@ void msix_write_completion(struct vcpu *v) { unsigned long ctrl_address = v->arch.hvm_vcpu.hvm_io.msix_unmask_address; + unsigned long snoop_addr = v->arch.hvm_vcpu.hvm_io.msix_snoop_address; + + v->arch.hvm_vcpu.hvm_io.msix_snoop_address = 0; + + if ( !ctrl_address && snoop_addr && + v->arch.hvm_vcpu.hvm_io.msix_snoop_gpa ) + { + const struct msi_desc *desc; + uint32_t data; + + rcu_read_lock(&msixtbl_rcu_lock); + desc = msixtbl_addr_to_desc(msixtbl_find_entry(v, snoop_addr), + snoop_addr); + rcu_read_unlock(&msixtbl_rcu_lock); + + if ( desc && + hvm_copy_from_guest_phys(&data, + v->arch.hvm_vcpu.hvm_io.msix_snoop_gpa, + sizeof(data)) == HVMCOPY_okay && + !(data & PCI_MSIX_VECTOR_BITMASK) ) + ctrl_address = snoop_addr; + } if ( !ctrl_address ) return; diff -Nru xen-4.6.0/xen/arch/x86/hvm/vmx/entry.S xen-4.6.5/xen/arch/x86/hvm/vmx/entry.S --- xen-4.6.0/xen/arch/x86/hvm/vmx/entry.S 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/vmx/entry.S 2017-03-07 16:19:05.000000000 +0000 @@ -101,14 +101,15 @@ /*.Lvmx_resume:*/ VMRESUME - sti - call vm_resume_fail - ud2 + jmp .Lvmx_vmentry_fail .Lvmx_launch: VMLAUNCH + +.Lvmx_vmentry_fail: sti - call vm_launch_fail + SAVE_ALL + call vmx_vmentry_failure ud2 ENTRY(vmx_asm_do_vmentry) diff -Nru xen-4.6.0/xen/arch/x86/hvm/vmx/vmcs.c xen-4.6.5/xen/arch/x86/hvm/vmx/vmcs.c --- xen-4.6.0/xen/arch/x86/hvm/vmx/vmcs.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/vmx/vmcs.c 2017-03-07 16:19:05.000000000 +0000 @@ -556,6 +556,20 @@ local_irq_restore(flags); } +void vmx_vmcs_reload(struct vcpu *v) +{ + /* + * As we may be running with interrupts disabled, we can't acquire + * v->arch.hvm_vmx.vmcs_lock here. However, with interrupts disabled + * the VMCS can't be taken away from us anymore if we still own it. + */ + ASSERT(v->is_running || !local_irq_is_enabled()); + if ( v->arch.hvm_vmx.vmcs == this_cpu(current_vmcs) ) + return; + + vmx_load_vmcs(v); +} + int vmx_cpu_up_prepare(unsigned int cpu) { /* @@ -1249,6 +1263,8 @@ __vmwrite(HOST_PAT, host_pat); __vmwrite(GUEST_PAT, guest_pat); } + if ( cpu_has_vmx_mpx ) + __vmwrite(GUEST_BNDCFGS, 0); vmx_vmcs_exit(v); @@ -1588,21 +1604,19 @@ free_xenheap_page(v->arch.hvm_vmx.msr_bitmap); } -void vm_launch_fail(void) +void vmx_vmentry_failure(void) { + struct vcpu *curr = current; unsigned long error; __vmread(VM_INSTRUCTION_ERROR, &error); - printk(" error code %lx\n", error); - domain_crash_synchronous(); -} + gprintk(XENLOG_ERR, "VM%s error: %#lx\n", + curr->arch.hvm_vmx.launched ? "RESUME" : "LAUNCH", error); -void vm_resume_fail(void) -{ - unsigned long error; + if ( error == VMX_INSN_INVALID_CONTROL_STATE || + error == VMX_INSN_INVALID_HOST_STATE ) + vmcs_dump_vcpu(curr); - __vmread(VM_INSTRUCTION_ERROR, &error); - printk(" error code %lx\n", error); domain_crash_synchronous(); } @@ -1611,10 +1625,7 @@ bool_t debug_state; if ( v->arch.hvm_vmx.active_cpu == smp_processor_id() ) - { - if ( v->arch.hvm_vmx.vmcs != this_cpu(current_vmcs) ) - vmx_load_vmcs(v); - } + vmx_vmcs_reload(v); else { /* diff -Nru xen-4.6.0/xen/arch/x86/hvm/vmx/vmx.c xen-4.6.5/xen/arch/x86/hvm/vmx/vmx.c --- xen-4.6.0/xen/arch/x86/hvm/vmx/vmx.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/vmx/vmx.c 2017-03-07 16:19:05.000000000 +0000 @@ -383,10 +383,13 @@ void vmx_update_exception_bitmap(struct vcpu *v) { + u32 bitmap = unlikely(v->arch.hvm_vmx.vmx_realmode) + ? 0xffffffffu : v->arch.hvm_vmx.exception_bitmap; + if ( nestedhvm_vcpu_in_guestmode(v) ) - nvmx_update_exception_bitmap(v, v->arch.hvm_vmx.exception_bitmap); + nvmx_update_exception_bitmap(v, bitmap); else - __vmwrite(EXCEPTION_BITMAP, v->arch.hvm_vmx.exception_bitmap); + __vmwrite(EXCEPTION_BITMAP, bitmap); } static int vmx_guest_x86_mode(struct vcpu *v) @@ -625,14 +628,14 @@ static unsigned int __init vmx_init_msr(void) { - return !!cpu_has_mpx; + return cpu_has_mpx && cpu_has_vmx_mpx; } static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt) { vmx_vmcs_enter(v); - if ( cpu_has_mpx ) + if ( cpu_has_mpx && cpu_has_vmx_mpx ) { __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val); if ( ctxt->msr[ctxt->count].val ) @@ -654,7 +657,9 @@ switch ( ctxt->msr[i].index ) { case MSR_IA32_BNDCFGS: - if ( cpu_has_mpx ) + if ( cpu_has_mpx && cpu_has_vmx_mpx && + is_canonical_address(ctxt->msr[i].val) && + !(ctxt->msr[i].val & IA32_BNDCFGS_RESERVED) ) __vmwrite(GUEST_BNDCFGS, ctxt->msr[i].val); else err = -ENXIO; @@ -717,6 +722,18 @@ if ( unlikely(!this_cpu(vmxon)) ) return; + if ( !v->is_running ) + { + /* + * When this vCPU isn't marked as running anymore, a remote pCPU's + * attempt to pause us (from vmx_vmcs_enter()) won't have a reason + * to spin in vcpu_sleep_sync(), and hence that pCPU might have taken + * away the VMCS from us. As we're running with interrupts disabled, + * we also can't call vmx_vmcs_enter(). + */ + vmx_vmcs_reload(v); + } + vmx_fpu_leave(v); vmx_save_guest_msrs(v); vmx_restore_host_msrs(); @@ -862,10 +879,12 @@ reg->sel = sel; reg->limit = limit; - reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00); - /* Unusable flag is folded into Present flag. */ - if ( attr & (1u<<16) ) - reg->attr.fields.p = 0; + /* + * Fold VT-x representation into Xen's representation. The Present bit is + * unconditionally set to the inverse of unusable. + */ + reg->attr.bytes = + (!(attr & (1u << 16)) << 7) | (attr & 0x7f) | ((attr >> 4) & 0xf00); /* Adjust for virtual 8086 mode */ if ( v->arch.hvm_vmx.vmx_realmode && seg <= x86_seg_tr @@ -945,11 +964,11 @@ } } - attr = ((attr & 0xf00) << 4) | (attr & 0xff); - - /* Not-present must mean unusable. */ - if ( !reg->attr.fields.p ) - attr |= (1u << 16); + /* + * Unfold Xen representation into VT-x representation. The unusable bit + * is unconditionally set to the inverse of present. + */ + attr = (!(attr & (1u << 7)) << 16) | ((attr & 0xf00) << 4) | (attr & 0xff); /* VMX has strict consistency requirement for flag G. */ attr |= !!(limit >> 20) << 15; @@ -1224,16 +1243,10 @@ void vmx_update_debug_state(struct vcpu *v) { - unsigned long mask; - - mask = 1u << TRAP_int3; - if ( !cpu_has_monitor_trap_flag ) - mask |= 1u << TRAP_debug; - if ( v->arch.hvm_vcpu.debug_state_latch ) - v->arch.hvm_vmx.exception_bitmap |= mask; + v->arch.hvm_vmx.exception_bitmap |= 1U << TRAP_int3; else - v->arch.hvm_vmx.exception_bitmap &= ~mask; + v->arch.hvm_vmx.exception_bitmap &= ~(1U << TRAP_int3); vmx_vmcs_enter(v); vmx_update_exception_bitmap(v); @@ -1294,33 +1307,29 @@ enum x86_segment s; struct segment_register reg[x86_seg_tr + 1]; + BUILD_BUG_ON(x86_seg_tr != x86_seg_gs + 1); + /* Entering or leaving real mode: adjust the segment registers. * Need to read them all either way, as realmode reads can update * the saved values we'll use when returning to prot mode. */ - for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ ) + for ( s = 0; s < ARRAY_SIZE(reg); s++ ) vmx_get_segment_register(v, s, ®[s]); v->arch.hvm_vmx.vmx_realmode = realmode; if ( realmode ) { - for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ ) + for ( s = 0; s < ARRAY_SIZE(reg); s++ ) vmx_set_segment_register(v, s, ®[s]); - v->arch.hvm_vmx.exception_bitmap = 0xffffffff; - vmx_update_exception_bitmap(v); } else { - for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ ) + for ( s = 0; s < ARRAY_SIZE(reg); s++ ) if ( !(v->arch.hvm_vmx.vm86_segment_mask & (1<arch.hvm_vmx.vm86_saved_seg[s]); - v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK - | (paging_mode_hap(v->domain) ? - 0 : (1U << TRAP_page_fault)) - | (1U << TRAP_no_device); - vmx_update_exception_bitmap(v); - vmx_update_debug_state(v); } + + vmx_update_exception_bitmap(v); } v->arch.hvm_vcpu.hw_cr[0] = @@ -1514,16 +1523,7 @@ struct vcpu *curr = current; struct hvm_trap _trap = *trap; - if ( (_trap.vector == TRAP_page_fault) && - (_trap.type == X86_EVENTTYPE_HW_EXCEPTION) ) - curr->arch.hvm_vcpu.guest_cr[2] = _trap.cr2; - - if ( nestedhvm_vcpu_in_guestmode(curr) ) - intr_info = vcpu_2_nvmx(curr).intr.intr_info; - else - __vmread(VM_ENTRY_INTR_INFO, &intr_info); - - switch ( _trap.vector ) + switch ( _trap.vector | -(_trap.type == X86_EVENTTYPE_SW_INTERRUPT) ) { case TRAP_debug: if ( guest_cpu_user_regs()->eflags & X86_EFLAGS_TF ) @@ -1531,6 +1531,16 @@ __restore_debug_registers(curr); write_debugreg(6, read_debugreg(6) | DR_STEP); } + if ( !nestedhvm_vcpu_in_guestmode(curr) || + !nvmx_intercepts_exception(curr, TRAP_debug, _trap.error_code) ) + { + unsigned long val; + + __vmread(GUEST_DR7, &val); + __vmwrite(GUEST_DR7, val & ~DR_GENERAL_DETECT); + __vmread(GUEST_IA32_DEBUGCTL, &val); + __vmwrite(GUEST_IA32_DEBUGCTL, val & ~IA32_DEBUGCTLMSR_LBR); + } if ( cpu_has_monitor_trap_flag ) break; /* fall through */ @@ -1541,8 +1551,19 @@ domain_pause_for_debugger(); return; } + break; + + case TRAP_page_fault: + ASSERT(_trap.type == X86_EVENTTYPE_HW_EXCEPTION); + curr->arch.hvm_vcpu.guest_cr[2] = _trap.cr2; + break; } + if ( nestedhvm_vcpu_in_guestmode(curr) ) + intr_info = vcpu_2_nvmx(curr).intr.intr_info; + else + __vmread(VM_ENTRY_INTR_INFO, &intr_info); + if ( unlikely(intr_info & INTR_INFO_VALID_MASK) && (MASK_EXTR(intr_info, INTR_INFO_INTR_TYPE_MASK) == X86_EVENTTYPE_HW_EXCEPTION) ) @@ -2057,7 +2078,6 @@ unsigned int *ecx, unsigned int *edx) { unsigned int input = *eax; - struct segment_register cs; struct vcpu *v = current; hvm_cpuid(input, eax, ebx, ecx, edx); @@ -2066,8 +2086,7 @@ { case 0x80000001: /* SYSCALL is visible iff running in long mode. */ - vmx_get_segment_register(v, x86_seg_cs, &cs); - if ( cs.attr.fields.l ) + if ( hvm_long_mode_enabled(v) ) *edx |= cpufeat_mask(X86_FEATURE_SYSCALL); else *edx &= ~(cpufeat_mask(X86_FEATURE_SYSCALL)); @@ -2301,8 +2320,13 @@ case MSR_IA32_DEBUGCTLMSR: __vmread(GUEST_IA32_DEBUGCTL, msr_content); break; + case MSR_IA32_BNDCFGS: + if ( !cpu_has_mpx || !cpu_has_vmx_mpx ) + goto gp_fault; + __vmread(GUEST_BNDCFGS, msr_content); + break; case IA32_FEATURE_CONTROL_MSR: - case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_TRUE_ENTRY_CTLS: + case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC: if ( !nvmx_msr_read_intercept(msr, msr_content) ) goto gp_fault; break; @@ -2322,6 +2346,13 @@ if ( vpmu_do_rdmsr(msr, msr_content) ) goto gp_fault; break; + + case MSR_INTEL_PLATFORM_INFO: + if ( rdmsr_safe(MSR_INTEL_PLATFORM_INFO, *msr_content) ) + goto gp_fault; + *msr_content = 0; + break; + default: if ( passive_domain_do_rdmsr(msr, msr_content) ) goto done; @@ -2515,12 +2546,19 @@ if ( (rc < 0) || (msr_content && (vmx_add_host_load_msr(msr) < 0)) ) - hvm_inject_hw_exception(TRAP_machine_check, 0); + hvm_inject_hw_exception(TRAP_machine_check, HVM_DELIVER_NO_ERROR_CODE); else __vmwrite(GUEST_IA32_DEBUGCTL, msr_content); break; } + case MSR_IA32_BNDCFGS: + if ( !cpu_has_mpx || !cpu_has_vmx_mpx || + !is_canonical_address(msr_content) || + (msr_content & IA32_BNDCFGS_RESERVED) ) + goto gp_fault; + __vmwrite(GUEST_BNDCFGS, msr_content); + break; case IA32_FEATURE_CONTROL_MSR: case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_TRUE_ENTRY_CTLS: if ( !nvmx_msr_write_intercept(msr, msr_content) ) @@ -2535,6 +2573,13 @@ if ( vpmu_do_wrmsr(msr, msr_content, 0) ) goto gp_fault; break; + + case MSR_INTEL_PLATFORM_INFO: + if ( msr_content || + rdmsr_safe(MSR_INTEL_PLATFORM_INFO, msr_content) ) + goto gp_fault; + break; + default: if ( passive_domain_do_wrmsr(msr, msr_content) ) return X86EMUL_OKAY; @@ -2621,7 +2666,10 @@ .read_access = !!(qualification & EPT_READ_VIOLATION) || !!(qualification & EPT_WRITE_VIOLATION), .write_access = !!(qualification & EPT_WRITE_VIOLATION), - .insn_fetch = !!(qualification & EPT_EXEC_VIOLATION) + .insn_fetch = !!(qualification & EPT_EXEC_VIOLATION), + .present = !!(qualification & (EPT_EFFECTIVE_READ | + EPT_EFFECTIVE_WRITE | + EPT_EFFECTIVE_EXEC)) }; if ( tb_init_done ) @@ -2751,6 +2799,10 @@ { regs->eip += sizeof(sig); regs->eflags &= ~X86_EFLAGS_RF; + + /* Zero the upper 32 bits of %rip if not in long mode. */ + if ( vmx_guest_x86_mode(current) != 8 ) + regs->eip = regs->_eip; } } @@ -2794,6 +2846,40 @@ return 0; } +/* + * Propagate VM_EXIT_INTR_INFO to VM_ENTRY_INTR_INFO. Used to mirror an + * intercepted exception back to the guest as if Xen hadn't intercepted it. + * + * It is the callers responsibility to ensure that this function is only used + * in the context of an appropriate vmexit. + */ +static void vmx_propagate_intr(unsigned long intr) +{ + struct hvm_trap trap = { + .vector = MASK_EXTR(intr, INTR_INFO_VECTOR_MASK), + .type = MASK_EXTR(intr, INTR_INFO_INTR_TYPE_MASK), + }; + unsigned long tmp; + + if ( intr & INTR_INFO_DELIVER_CODE_MASK ) + { + __vmread(VM_EXIT_INTR_ERROR_CODE, &tmp); + trap.error_code = tmp; + } + else + trap.error_code = HVM_DELIVER_NO_ERROR_CODE; + + if ( trap.type >= X86_EVENTTYPE_SW_INTERRUPT ) + { + __vmread(VM_EXIT_INSTRUCTION_LEN, &tmp); + trap.insn_len = tmp; + } + else + trap.insn_len = 0; + + hvm_inject_trap(&trap); +} + static void vmx_idtv_reinject(unsigned long idtv_info) { @@ -2846,7 +2932,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) { unsigned long exit_qualification, exit_reason, idtv_info, intr_info = 0; - unsigned int vector = 0; + unsigned int vector = 0, mode; struct vcpu *v = current; __vmread(GUEST_RIP, ®s->rip); @@ -3041,9 +3127,10 @@ __vmread(EXIT_QUALIFICATION, &exit_qualification); HVMTRACE_1D(TRAP_DEBUG, exit_qualification); write_debugreg(6, exit_qualification | DR_STATUS_RESERVED_ONE); - if ( !v->domain->debugger_attached || cpu_has_monitor_trap_flag ) - goto exit_and_crash; - domain_pause_for_debugger(); + if ( !v->domain->debugger_attached ) + vmx_propagate_intr(intr_info); + else + domain_pause_for_debugger(); break; case TRAP_int3: { @@ -3108,6 +3195,10 @@ hvm_inject_page_fault(regs->error_code, exit_qualification); break; + case TRAP_alignment_check: + HVMTRACE_1D(TRAP, vector); + vmx_propagate_intr(intr_info); + break; case TRAP_nmi: if ( MASK_EXTR(intr_info, INTR_INFO_INTR_TYPE_MASK) != X86_EVENTTYPE_NMI ) @@ -3430,6 +3521,41 @@ out: if ( nestedhvm_vcpu_in_guestmode(v) ) nvmx_idtv_handling(); + + /* + * VM entry will fail (causing the guest to get crashed) if rIP (and + * rFLAGS, but we don't have an issue there) doesn't meet certain + * criteria. As we must not allow less than fully privileged mode to have + * such an effect on the domain, we correct rIP in that case (accepting + * this not being architecturally correct behavior, as the injected #GP + * fault will then not see the correct [invalid] return address). + * And since we know the guest will crash, we crash it right away if it + * already is in most privileged mode. + */ + mode = vmx_guest_x86_mode(v); + if ( mode == 8 ? !is_canonical_address(regs->rip) + : regs->rip != regs->_eip ) + { + struct segment_register ss; + + gprintk(XENLOG_WARNING, "Bad rIP %lx for mode %u\n", regs->rip, mode); + + vmx_get_segment_register(v, x86_seg_ss, &ss); + if ( ss.attr.fields.dpl ) + { + __vmread(VM_ENTRY_INTR_INFO, &intr_info); + if ( !(intr_info & INTR_INFO_VALID_MASK) ) + hvm_inject_hw_exception(TRAP_gp_fault, 0); + /* Need to fix rIP nevertheless. */ + if ( mode == 8 ) + regs->rip = (long)(regs->rip << (64 - VADDR_BITS)) >> + (64 - VADDR_BITS); + else + regs->rip = regs->_eip; + } + else + domain_crash(v->domain); + } } void vmx_vmenter_helper(const struct cpu_user_regs *regs) diff -Nru xen-4.6.0/xen/arch/x86/hvm/vmx/vvmx.c xen-4.6.5/xen/arch/x86/hvm/vmx/vvmx.c --- xen-4.6.0/xen/arch/x86/hvm/vmx/vvmx.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/hvm/vmx/vvmx.c 2017-03-07 16:19:05.000000000 +0000 @@ -1869,11 +1869,22 @@ return 0; /* - * Those MSRs are available only when bit 55 of - * MSR_IA32_VMX_BASIC is set. + * These MSRs are only available when flags in other MSRs are set. + * These prerequisites are listed in the Intel 64 and IA-32 + * Architectures Software Developer’s Manual, Vol 3, Appendix A. */ switch ( msr ) { + case MSR_IA32_VMX_PROCBASED_CTLS2: + if ( !cpu_has_vmx_secondary_exec_control ) + return 0; + break; + + case MSR_IA32_VMX_EPT_VPID_CAP: + if ( !(cpu_has_vmx_ept || cpu_has_vmx_vpid) ) + return 0; + break; + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: case MSR_IA32_VMX_TRUE_EXIT_CTLS: @@ -1881,6 +1892,11 @@ if ( !(vmx_basic_msr & VMX_BASIC_DEFAULT1_ZERO) ) return 0; break; + + case MSR_IA32_VMX_VMFUNC: + if ( !cpu_has_vmx_vmfunc ) + return 0; + break; } rdmsrl(msr, host_data); @@ -2077,6 +2093,8 @@ uint32_t rwx_rights = (access_x << 2) | (access_w << 1) | access_r; struct nestedvmx *nvmx = &vcpu_2_nvmx(v); + vmx_vmcs_enter(v); + __vmread(EXIT_QUALIFICATION, &exit_qual); rc = nept_translate_l2ga(v, L2_gpa, page_order, rwx_rights, &gfn, p2m_acc, &exit_qual, &exit_reason); @@ -2101,6 +2119,8 @@ break; } + vmx_vmcs_exit(v); + return rc; } diff -Nru xen-4.6.0/xen/arch/x86/i387.c xen-4.6.5/xen/arch/x86/i387.c --- xen-4.6.0/xen/arch/x86/i387.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/i387.c 2017-03-07 16:19:05.000000000 +0000 @@ -17,19 +17,6 @@ #include #include -static void fpu_init(void) -{ - unsigned long val; - - asm volatile ( "fninit" ); - if ( cpu_has_xmm ) - { - /* load default value into MXCSR control/status register */ - val = MXCSR_DEFAULT; - asm volatile ( "ldmxcsr %0" : : "m" (val) ); - } -} - /*******************************/ /* FPU Restore Functions */ /*******************************/ @@ -62,7 +49,7 @@ * sometimes new user value. Both should be ok. Use the FPU saved * data block as a safe address because it should be in L1. */ - if ( !(fpu_ctxt->fsw & 0x0080) && + if ( !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) && boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) { asm volatile ( "fnclex\n\t" @@ -165,9 +152,9 @@ static inline void fpu_fxsave(struct vcpu *v) { typeof(v->arch.xsave_area->fpu_sse) *fpu_ctxt = v->arch.fpu_ctxt; - int word_size = cpu_has_fpu_sel ? 8 : 0; + unsigned int fip_width = v->domain->arch.x87_fip_width; - if ( !is_pv_32bit_vcpu(v) ) + if ( fip_width != 4 ) { /* * The only way to force fxsaveq on a wide range of gas versions. @@ -185,7 +172,11 @@ boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) return; - if ( word_size > 0 && + /* + * If the FIP/FDP[63:32] are both zero, it is safe to use the + * 32-bit restore to also restore the selectors. + */ + if ( !fip_width && !((fpu_ctxt->fip.addr | fpu_ctxt->fdp.addr) >> 32) ) { struct ix87_env fpu_env; @@ -193,17 +184,18 @@ asm volatile ( "fnstenv %0" : "=m" (fpu_env) ); fpu_ctxt->fip.sel = fpu_env.fcs; fpu_ctxt->fdp.sel = fpu_env.fds; - word_size = 4; + fip_width = 4; } + else + fip_width = 8; } else { asm volatile ( "fxsave %0" : "=m" (*fpu_ctxt) ); - word_size = 4; + fip_width = 4; } - if ( word_size >= 0 ) - fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = word_size; + fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = fip_width; } /* Save x87 FPU state */ @@ -248,15 +240,8 @@ if ( cpu_has_xsave ) fpu_xrstor(v, XSTATE_LAZY); - else if ( v->fpu_initialised ) - { - if ( cpu_has_fxsr ) - fpu_fxrstor(v); - else - fpu_frstor(v); - } else - fpu_init(); + fpu_fxrstor(v); v->fpu_initialised = 1; v->fpu_dirtied = 1; @@ -313,7 +298,14 @@ else { v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse), 16); - if ( !v->arch.fpu_ctxt ) + if ( v->arch.fpu_ctxt ) + { + typeof(v->arch.xsave_area->fpu_sse) *fpu_sse = v->arch.fpu_ctxt; + + fpu_sse->fcw = FCW_DEFAULT; + fpu_sse->mxcsr = MXCSR_DEFAULT; + } + else rc = -ENOMEM; } diff -Nru xen-4.6.0/xen/arch/x86/mm/guest_walk.c xen-4.6.5/xen/arch/x86/mm/guest_walk.c --- xen-4.6.0/xen/arch/x86/mm/guest_walk.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm/guest_walk.c 2017-03-07 16:19:05.000000000 +0000 @@ -93,6 +93,12 @@ struct page_info *page; void *map; + if ( gfn_x(gfn) >> p2m->domain->arch.paging.gfn_bits ) + { + *rc = _PAGE_INVALID_BIT; + return NULL; + } + /* Translate the gfn, unsharing if shared */ page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), p2mt, NULL, q); @@ -212,6 +218,11 @@ rc |= _PAGE_PRESENT; goto out; } + if ( gflags & _PAGE_PSE ) + { + rc |= _PAGE_PSE | _PAGE_INVALID_BIT; + goto out; + } rc |= ((gflags & mflags) ^ mflags); /* Map the l3 table */ @@ -232,7 +243,7 @@ } rc |= ((gflags & mflags) ^ mflags); - pse1G = (gflags & _PAGE_PSE) && guest_supports_1G_superpages(v); + pse1G = !!(gflags & _PAGE_PSE); if ( pse1G ) { @@ -252,6 +263,8 @@ /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */ flags &= ~_PAGE_PAT; + if ( !guest_supports_1G_superpages(v) ) + rc |= _PAGE_PSE | _PAGE_INVALID_BIT; if ( gfn_x(start) & GUEST_L3_GFN_MASK & ~0x1 ) rc |= _PAGE_INVALID_BITS; @@ -326,20 +339,8 @@ flags &= ~_PAGE_PAT; if ( gfn_x(start) & GUEST_L2_GFN_MASK & ~0x1 ) - { -#if GUEST_PAGING_LEVELS == 2 - /* - * Note that _PAGE_INVALID_BITS is zero in this case, yielding a - * no-op here. - * - * Architecturally, the walk should fail if bit 21 is set (others - * aren't being checked at least in PSE36 mode), but we'll ignore - * this here in order to avoid specifying a non-natural, non-zero - * _PAGE_INVALID_BITS value just for that case. - */ -#endif rc |= _PAGE_INVALID_BITS; - } + /* Increment the pfn by the right number of 4k pages. * Mask out PAT and invalid bits. */ start = _gfn((gfn_x(start) & ~GUEST_L2_GFN_MASK) + @@ -422,5 +423,11 @@ put_page(mfn_to_page(mfn_x(gw->l1mfn))); } + /* If this guest has a restricted physical address space then the + * target GFN must fit within it. */ + if ( !(rc & _PAGE_PRESENT) + && gfn_x(guest_l1e_get_gfn(gw->l1e)) >> d->arch.paging.gfn_bits ) + rc |= _PAGE_INVALID_BITS; + return rc; } diff -Nru xen-4.6.0/xen/arch/x86/mm/hap/hap.c xen-4.6.5/xen/arch/x86/mm/hap/hap.c --- xen-4.6.0/xen/arch/x86/mm/hap/hap.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm/hap/hap.c 2017-03-07 16:19:05.000000000 +0000 @@ -448,6 +448,8 @@ { INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist); + d->arch.paging.gfn_bits = hap_paddr_bits - PAGE_SHIFT; + /* Use HAP logdirty mechanism. */ paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty, @@ -688,7 +690,8 @@ * Must perform the flush right now or an other vcpu may * use it when we use the next VMRUN emulation, otherwise. */ - p2m_flush(v, vcpu_nestedhvm(v).nv_p2m); + if ( vcpu_nestedhvm(v).nv_p2m ) + p2m_flush(v, vcpu_nestedhvm(v).nv_p2m); return 1; } diff -Nru xen-4.6.0/xen/arch/x86/mm/hap/nested_hap.c xen-4.6.5/xen/arch/x86/mm/hap/nested_hap.c --- xen-4.6.0/xen/arch/x86/mm/hap/nested_hap.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm/hap/nested_hap.c 2017-03-07 16:19:05.000000000 +0000 @@ -141,7 +141,7 @@ * walk is successful, the translated value is returned in * L1_gpa. The result value tells what to do next. */ -static int +int nestedhap_walk_L1_p2m(struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa, unsigned int *page_order, uint8_t *p2m_acc, bool_t access_r, bool_t access_w, bool_t access_x) @@ -263,7 +263,7 @@ switch ( p2ma_10 ) { - case p2m_access_rwx ... p2m_access_n: + case p2m_access_n ... p2m_access_rwx: break; case p2m_access_rx2rw: p2ma_10 = p2m_access_rx; diff -Nru xen-4.6.0/xen/arch/x86/mm/p2m.c xen-4.6.5/xen/arch/x86/mm/p2m.c --- xen-4.6.0/xen/arch/x86/mm/p2m.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm/p2m.c 2017-03-07 16:19:05.000000000 +0000 @@ -60,6 +60,7 @@ /* Init the datastructures for later use by the p2m code */ static int p2m_initialise(struct domain *d, struct p2m_domain *p2m) { + unsigned int i; int ret = 0; mm_rwlock_init(&p2m->lock); @@ -75,6 +76,9 @@ p2m->np2m_base = P2M_BASE_EADDR; + for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i ) + p2m->pod.mrp.list[i] = INVALID_GFN; + if ( hap_enabled(d) && cpu_has_vmx ) ret = ept_p2m_init(p2m); else @@ -587,9 +591,11 @@ void p2m_final_teardown(struct domain *d) { - /* We must teardown unconditionally because + /* + * We must teardown both of them unconditionally because * we initialise them unconditionally. */ + p2m_teardown_altp2m(d); p2m_teardown_nestedp2m(d); /* Iterate over all p2m tables per domain */ @@ -1910,7 +1916,8 @@ ASSERT(page_list_empty(&p2m->pod.super)); ASSERT(page_list_empty(&p2m->pod.single)); - if ( p2m->np2m_base == P2M_BASE_EADDR ) + /* No need to flush if it's already empty */ + if ( p2m_is_nestedp2m(p2m) && p2m->np2m_base == P2M_BASE_EADDR ) { p2m_unlock(p2m); return; @@ -2027,20 +2034,39 @@ if ( is_hvm_vcpu(v) && paging_mode_hap(v->domain) && nestedhvm_is_n2(v) ) { - unsigned long gfn; + unsigned long l2_gfn, l1_gfn; struct p2m_domain *p2m; const struct paging_mode *mode; - uint32_t pfec_21 = *pfec; uint64_t np2m_base = nhvm_vcpu_p2m_base(v); + uint8_t l1_p2ma; + unsigned int l1_page_order; + int rv; /* translate l2 guest va into l2 guest gfn */ p2m = p2m_get_nestedp2m(v, np2m_base); mode = paging_get_nestedmode(v); - gfn = mode->gva_to_gfn(v, p2m, va, pfec); + l2_gfn = mode->gva_to_gfn(v, p2m, va, pfec); + + if ( l2_gfn == INVALID_GFN ) + return INVALID_GFN; /* translate l2 guest gfn into l1 guest gfn */ - return hostmode->p2m_ga_to_gfn(v, hostp2m, np2m_base, - gfn << PAGE_SHIFT, &pfec_21, NULL); + rv = nestedhap_walk_L1_p2m(v, l2_gfn, &l1_gfn, &l1_page_order, &l1_p2ma, + 1, + !!(*pfec & PFEC_write_access), + !!(*pfec & PFEC_insn_fetch)); + + if ( rv != NESTEDHVM_PAGEFAULT_DONE ) + return INVALID_GFN; + + /* + * Sanity check that l1_gfn can be used properly as a 4K mapping, even + * if it mapped by a nested superpage. + */ + ASSERT((l2_gfn & ((1ul << l1_page_order) - 1)) == + (l1_gfn & ((1ul << l1_page_order) - 1))); + + return l1_gfn; } return hostmode->gva_to_gfn(v, hostp2m, va, pfec); diff -Nru xen-4.6.0/xen/arch/x86/mm/p2m-ept.c xen-4.6.5/xen/arch/x86/mm/p2m-ept.c --- xen-4.6.0/xen/arch/x86/mm/p2m-ept.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm/p2m-ept.c 2017-03-07 16:19:05.000000000 +0000 @@ -642,7 +642,6 @@ spurious = curr->arch.hvm_vmx.ept_spurious_misconfig; rc = resolve_misconfig(p2m, PFN_DOWN(gpa)); curr->arch.hvm_vmx.ept_spurious_misconfig = 0; - ept_sync_domain(p2m); p2m_unlock(p2m); @@ -669,7 +668,7 @@ int need_modify_vtd_table = 1; int vtd_pte_present = 0; unsigned int iommu_flags = p2m_get_iommu_flags(p2mt); - enum { sync_off, sync_on, sync_check } needs_sync = sync_check; + bool_t needs_sync = 1; ept_entry_t old_entry = { .epte = 0 }; ept_entry_t new_entry = { .epte = 0 }; struct ept_data *ept = &p2m->ept; @@ -690,12 +689,7 @@ /* Carry out any eventually pending earlier changes first. */ ret = resolve_misconfig(p2m, gfn); if ( ret < 0 ) - { - ept_sync_domain(p2m); return ret; - } - if ( ret > 0 ) - needs_sync = sync_on; ASSERT((target == 2 && hvm_hap_has_1gb()) || (target == 1 && hvm_hap_has_2mb()) || @@ -738,8 +732,8 @@ /* We reached the target level. */ /* No need to flush if the old entry wasn't valid */ - if ( needs_sync == sync_check && !is_epte_present(ept_entry) ) - needs_sync = sync_off; + if ( !is_epte_present(ept_entry) ) + needs_sync = 0; /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 2MiB), * the intermediate tables will be freed below after the ept flush @@ -778,8 +772,7 @@ ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER)); } - if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) || - (p2mt == p2m_ram_paging_in) ) + if ( mfn_valid(mfn_x(mfn)) || p2m_allows_invalid_mfn(p2mt) ) { int emt = epte_get_entry_emt(p2m->domain, gfn, mfn, i * EPT_TABLE_ORDER, &ipat, direct_mmio); @@ -821,7 +814,7 @@ p2m->max_mapped_pfn = gfn + (1UL << order) - 1; out: - if ( needs_sync != sync_off ) + if ( needs_sync ) ept_sync_domain(p2m); /* For host p2m, may need to change VT-d page table.*/ diff -Nru xen-4.6.0/xen/arch/x86/mm/p2m-pod.c xen-4.6.5/xen/arch/x86/mm/p2m-pod.c --- xen-4.6.0/xen/arch/x86/mm/p2m-pod.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm/p2m-pod.c 2017-03-07 16:19:05.000000000 +0000 @@ -375,11 +375,11 @@ return ret; } -void -p2m_pod_empty_cache(struct domain *d) +int p2m_pod_empty_cache(struct domain *d) { struct p2m_domain *p2m = p2m_get_hostp2m(d); struct page_info *page; + unsigned int i; /* After this barrier no new PoD activities can happen. */ BUG_ON(!d->is_dying); @@ -389,8 +389,6 @@ while ( (page = page_list_remove_head(&p2m->pod.super)) ) { - int i; - for ( i = 0 ; i < SUPERPAGE_PAGES ; i++ ) { BUG_ON(page_get_owner(page + i) != d); @@ -398,19 +396,27 @@ } p2m->pod.count -= SUPERPAGE_PAGES; + + if ( hypercall_preempt_check() ) + goto out; } - while ( (page = page_list_remove_head(&p2m->pod.single)) ) + for ( i = 0; (page = page_list_remove_head(&p2m->pod.single)); ++i ) { BUG_ON(page_get_owner(page) != d); page_list_add_tail(page, &d->page_list); p2m->pod.count -= 1; + + if ( i && !(i & 511) && hypercall_preempt_check() ) + goto out; } BUG_ON(p2m->pod.count != 0); + out: unlock_page_alloc(p2m); + return p2m->pod.count ? -ERESTART : 0; } int @@ -723,7 +729,7 @@ } /* Try to remove the page, restoring old mapping if it fails. */ - p2m_set_entry(p2m, gfn, _mfn(0), PAGE_ORDER_2M, + p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_2M, p2m_populate_on_demand, p2m->default_access); /* Make none of the MFNs are used elsewhere... for example, mapped @@ -839,7 +845,7 @@ } /* Try to remove the page, restoring old mapping if it fails. */ - p2m_set_entry(p2m, gfns[i], _mfn(0), PAGE_ORDER_4K, + p2m_set_entry(p2m, gfns[i], _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_populate_on_demand, p2m->default_access); /* See if the page was successfully unmapped. (Allow one refcount @@ -901,28 +907,6 @@ } #define POD_SWEEP_LIMIT 1024 - -/* When populating a new superpage, look at recently populated superpages - * hoping that they've been zeroed. This will snap up zeroed pages as soon as - * the guest OS is done with them. */ -static void -p2m_pod_check_last_super(struct p2m_domain *p2m, unsigned long gfn_aligned) -{ - unsigned long check_gfn; - - ASSERT(p2m->pod.last_populated_index < POD_HISTORY_MAX); - - check_gfn = p2m->pod.last_populated[p2m->pod.last_populated_index]; - - p2m->pod.last_populated[p2m->pod.last_populated_index] = gfn_aligned; - - p2m->pod.last_populated_index = - ( p2m->pod.last_populated_index + 1 ) % POD_HISTORY_MAX; - - p2m_pod_zero_check_superpage(p2m, check_gfn); -} - - #define POD_SWEEP_STRIDE 16 static void p2m_pod_emergency_sweep(struct p2m_domain *p2m) @@ -963,7 +947,7 @@ * NB that this is a zero-sum game; we're increasing our cache size * by re-increasing our 'debt'. Since we hold the pod lock, * (entry_count - count) must remain the same. */ - if ( p2m->pod.count > 0 && i < limit ) + if ( i < limit && (p2m->pod.count > 0 || hypercall_preempt_check()) ) break; } @@ -975,6 +959,57 @@ } +static void pod_eager_reclaim(struct p2m_domain *p2m) +{ + struct pod_mrp_list *mrp = &p2m->pod.mrp; + unsigned int i = 0; + + /* + * Always check one page for reclaimation. + * + * If the PoD pool is empty, keep checking some space is found, or all + * entries have been exhaused. + */ + do + { + unsigned int idx = (mrp->idx + i++) % ARRAY_SIZE(mrp->list); + unsigned long gfn = mrp->list[idx]; + + if ( gfn != INVALID_GFN ) + { + if ( gfn & POD_LAST_SUPERPAGE ) + { + gfn &= ~POD_LAST_SUPERPAGE; + + if ( p2m_pod_zero_check_superpage(p2m, gfn) == 0 ) + { + unsigned int x; + + for ( x = 0; x < SUPERPAGE_PAGES; ++x, ++gfn ) + p2m_pod_zero_check(p2m, &gfn, 1); + } + } + else + p2m_pod_zero_check(p2m, &gfn, 1); + + mrp->list[idx] = INVALID_GFN; + } + + } while ( (p2m->pod.count == 0) && (i < ARRAY_SIZE(mrp->list)) ); +} + +static void pod_eager_record(struct p2m_domain *p2m, + unsigned long gfn, unsigned int order) +{ + struct pod_mrp_list *mrp = &p2m->pod.mrp; + + ASSERT(gfn != INVALID_GFN); + + mrp->list[mrp->idx++] = + gfn | (order == PAGE_ORDER_2M ? POD_LAST_SUPERPAGE : 0); + mrp->idx %= ARRAY_SIZE(mrp->list); +} + int p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn, unsigned int order, @@ -1010,11 +1045,15 @@ * NOTE: In a fine-grained p2m locking scenario this operation * may need to promote its locking from gfn->1g superpage */ - p2m_set_entry(p2m, gfn_aligned, _mfn(0), PAGE_ORDER_2M, + p2m_set_entry(p2m, gfn_aligned, _mfn(INVALID_MFN), PAGE_ORDER_2M, p2m_populate_on_demand, p2m->default_access); return 0; } + /* Only reclaim if we're in actual need of more cache. */ + if ( p2m->pod.entry_count > p2m->pod.count ) + pod_eager_reclaim(p2m); + /* Only sweep if we're actually out of memory. Doing anything else * causes unnecessary time and fragmentation of superpages in the p2m. */ if ( p2m->pod.count == 0 ) @@ -1051,6 +1090,8 @@ p2m->pod.entry_count -= (1 << order); BUG_ON(p2m->pod.entry_count < 0); + pod_eager_record(p2m, gfn_aligned, order); + if ( tb_init_done ) { struct { @@ -1066,12 +1107,6 @@ __trace_var(TRC_MEM_POD_POPULATE, 0, sizeof(t), &t); } - /* Check the last guest demand-populate */ - if ( p2m->pod.entry_count > p2m->pod.count - && (order == PAGE_ORDER_2M) - && (q & P2M_ALLOC) ) - p2m_pod_check_last_super(p2m, gfn_aligned); - pod_unlock(p2m); return 0; out_of_memory: @@ -1094,7 +1129,7 @@ * need promoting the gfn lock from gfn->2M superpage */ gfn_aligned = (gfn>>order)<default_access); if ( tb_init_done ) { @@ -1149,8 +1184,8 @@ } /* Now, actually do the two-way mapping */ - rc = p2m_set_entry(p2m, gfn, _mfn(0), order, p2m_populate_on_demand, - p2m->default_access); + rc = p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), order, + p2m_populate_on_demand, p2m->default_access); if ( rc == 0 ) { pod_lock(p2m); diff -Nru xen-4.6.0/xen/arch/x86/mm/p2m-pt.c xen-4.6.5/xen/arch/x86/mm/p2m-pt.c --- xen-4.6.0/xen/arch/x86/mm/p2m-pt.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm/p2m-pt.c 2017-03-07 16:19:05.000000000 +0000 @@ -107,6 +107,8 @@ case p2m_mmio_direct: if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) ) flags |= _PAGE_RW; + else + flags |= _PAGE_PWT; return flags | P2M_BASE_FLAGS | _PAGE_PCD; } } @@ -571,7 +573,7 @@ } ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); - l3e_content = mfn_valid(mfn) + l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ? l3e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE) : l3e_empty(); @@ -607,8 +609,7 @@ p2m_get_iommu_flags(p2m_flags_to_type(l1e_get_flags(*p2m_entry))); old_mfn = l1e_get_pfn(*p2m_entry); - if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) - || p2m_is_paging(p2mt) ) + if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ) entry_content = p2m_l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn)); else @@ -644,7 +645,7 @@ } ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); - if ( mfn_valid(mfn) || p2m_is_pod(p2mt) ) + if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ) l2e_content = l2e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE); diff -Nru xen-4.6.0/xen/arch/x86/mm/paging.c xen-4.6.5/xen/arch/x86/mm/paging.c --- xen-4.6.0/xen/arch/x86/mm/paging.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm/paging.c 2017-03-07 16:19:05.000000000 +0000 @@ -815,7 +815,7 @@ return rc; /* Move populate-on-demand cache back to domain_list for destruction */ - p2m_pod_empty_cache(d); + rc = p2m_pod_empty_cache(d); return rc; } diff -Nru xen-4.6.0/xen/arch/x86/mm/shadow/common.c xen-4.6.5/xen/arch/x86/mm/shadow/common.c --- xen-4.6.0/xen/arch/x86/mm/shadow/common.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm/shadow/common.c 2017-03-07 16:19:05.000000000 +0000 @@ -51,6 +51,16 @@ INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelist); INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows); + d->arch.paging.gfn_bits = paddr_bits - PAGE_SHIFT; +#ifndef CONFIG_BIGMEM + /* + * Shadowed superpages store GFNs in 32-bit page_info fields. + * Note that we cannot use guest_supports_superpages() here. + */ + if ( !is_pv_domain(d) || opt_allow_superpage ) + d->arch.paging.gfn_bits = 32; +#endif + /* Use shadow pagetables for log-dirty support */ paging_log_dirty_init(d, sh_enable_log_dirty, sh_disable_log_dirty, sh_clean_dirty_bitmap); @@ -115,12 +125,22 @@ /* x86 emulator support for the shadow code */ +/* + * Callers which pass a known in-range x86_segment can rely on the return + * pointer being valid. Other callers must explicitly check for errors. + */ struct segment_register *hvm_get_seg_reg( enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt) { - struct segment_register *seg_reg = &sh_ctxt->seg_reg[seg]; - if ( !__test_and_set_bit(seg, &sh_ctxt->valid_seg_regs) ) - hvm_get_segment_register(current, seg, seg_reg); + unsigned int idx = seg; + struct segment_register *seg_reg; + + if ( idx >= ARRAY_SIZE(sh_ctxt->seg_reg) ) + return ERR_PTR(-X86EMUL_UNHANDLEABLE); + + seg_reg = &sh_ctxt->seg_reg[idx]; + if ( !__test_and_set_bit(idx, &sh_ctxt->valid_seg_regs) ) + hvm_get_segment_register(current, idx, seg_reg); return seg_reg; } @@ -132,9 +152,13 @@ struct sh_emulate_ctxt *sh_ctxt, unsigned long *paddr) { - struct segment_register *reg = hvm_get_seg_reg(seg, sh_ctxt); + struct segment_register *reg; int okay; + reg = hvm_get_seg_reg(seg, sh_ctxt); + if ( IS_ERR(reg) ) + return -PTR_ERR(reg); + okay = hvm_virtual_to_linear_addr( seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr); @@ -160,7 +184,7 @@ rc = hvm_translate_linear_addr( seg, offset, bytes, access_type, sh_ctxt, &addr); - if ( rc ) + if ( rc || !bytes ) return rc; if ( access_type == hvm_access_insn_fetch ) @@ -235,16 +259,13 @@ unsigned long addr; int rc; - if ( !is_x86_user_segment(seg) ) - return X86EMUL_UNHANDLEABLE; - /* How many emulations could we save if we unshadowed on stack writes? */ if ( seg == x86_seg_ss ) perfc_incr(shadow_fault_emulate_stack); rc = hvm_translate_linear_addr( seg, offset, bytes, hvm_access_write, sh_ctxt, &addr); - if ( rc ) + if ( rc || !bytes ) return rc; return v->arch.paging.mode->shadow.x86_emulate_write( @@ -265,9 +286,6 @@ unsigned long addr, old[2], new[2]; int rc; - if ( !is_x86_user_segment(seg) ) - return X86EMUL_UNHANDLEABLE; - rc = hvm_translate_linear_addr( seg, offset, bytes, hvm_access_write, sh_ctxt, &addr); if ( rc ) @@ -1590,7 +1608,7 @@ if ( !d->arch.paging.p2m_alloc_failed ) { d->arch.paging.p2m_alloc_failed = 1; - dprintk(XENLOG_ERR, "d%i failed to allocate from shadow pool", + dprintk(XENLOG_ERR, "d%i failed to allocate from shadow pool\n", d->domain_id); } paging_unlock(d); @@ -2429,7 +2447,7 @@ /* Remove all mappings of a guest frame from the shadow tables. * Returns non-zero if we need to flush TLBs. */ -static int sh_remove_all_mappings(struct domain *d, mfn_t gmfn) +static int sh_remove_all_mappings(struct domain *d, mfn_t gmfn, gfn_t gfn) { struct page_info *page = mfn_to_page(gmfn); @@ -2481,19 +2499,24 @@ /* If that didn't catch the mapping, something is very wrong */ if ( !sh_check_page_has_no_refs(page) ) { - /* Don't complain if we're in HVM and there are some extra mappings: + /* + * Don't complain if we're in HVM and there are some extra mappings: * The qemu helper process has an untyped mapping of this dom's RAM * and the HVM restore program takes another. - * Also allow one typed refcount for xenheap pages, to match - * share_xen_page_with_guest(). */ + * Also allow one typed refcount for + * - Xen heap pages, to match share_xen_page_with_guest(), + * - ioreq server pages, to match prepare_ring_for_helper(). + */ if ( !(shadow_mode_external(d) && (page->count_info & PGC_count_mask) <= 3 && ((page->u.inuse.type_info & PGT_count_mask) - == !!is_xen_heap_page(page))) ) + == (is_xen_heap_page(page) || + is_ioreq_server_page(d, page)))) ) { - SHADOW_ERROR("can't find all mappings of mfn %lx: " - "c=%08lx t=%08lx\n", mfn_x(gmfn), - page->count_info, page->u.inuse.type_info); + SHADOW_ERROR("can't find all mappings of mfn %lx (gfn %lx): " + "c=%lx t=%lx x=%d i=%d\n", mfn_x(gmfn), gfn_x(gfn), + page->count_info, page->u.inuse.type_info, + !!is_xen_heap_page(page), is_ioreq_server_page(d, page)); } } @@ -3361,7 +3384,7 @@ if ( (p2m_is_valid(p2mt) || p2m_is_grant(p2mt)) && mfn_valid(mfn) ) { sh_remove_all_shadows_and_parents(d, mfn); - if ( sh_remove_all_mappings(d, mfn) ) + if ( sh_remove_all_mappings(d, mfn, _gfn(gfn)) ) flush_tlb_mask(d->domain_dirty_cpumask); } } @@ -3396,7 +3419,8 @@ { /* This GFN->MFN mapping has gone away */ sh_remove_all_shadows_and_parents(d, omfn); - if ( sh_remove_all_mappings(d, omfn) ) + if ( sh_remove_all_mappings(d, omfn, + _gfn(gfn + (i << PAGE_SHIFT))) ) cpumask_or(&flushmask, &flushmask, d->domain_dirty_cpumask); } @@ -3612,7 +3636,8 @@ dirty = 1; /* TODO: Heuristics for finding the single mapping of * this gmfn */ - flush_tlb |= sh_remove_all_mappings(d, mfn); + flush_tlb |= sh_remove_all_mappings(d, mfn, + _gfn(begin_pfn + i)); } else { diff -Nru xen-4.6.0/xen/arch/x86/mm/shadow/multi.c xen-4.6.5/xen/arch/x86/mm/shadow/multi.c --- xen-4.6.0/xen/arch/x86/mm/shadow/multi.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm/shadow/multi.c 2017-03-07 16:19:05.000000000 +0000 @@ -519,12 +519,14 @@ gfn_t target_gfn = guest_l1e_get_gfn(guest_entry); u32 pass_thru_flags; u32 gflags, sflags; + bool_t mmio_mfn; /* We don't shadow PAE l3s */ ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); /* Check there's something for the shadows to map to */ - if ( !p2m_is_valid(p2mt) && !p2m_is_grant(p2mt) ) + if ( (!p2m_is_valid(p2mt) && !p2m_is_grant(p2mt)) + || gfn_x(target_gfn) >> d->arch.paging.gfn_bits ) { *sp = shadow_l1e_empty(); goto done; @@ -559,7 +561,10 @@ // mfn means that we can not usefully shadow anything, and so we // return early. // - if ( !mfn_valid(target_mfn) + mmio_mfn = !mfn_valid(target_mfn) + || (level == 1 + && page_get_owner(mfn_to_page(target_mfn)) == dom_io); + if ( mmio_mfn && !(level == 1 && (!shadow_mode_refcounts(d) || p2mt == p2m_mmio_direct)) ) { @@ -577,7 +582,7 @@ _PAGE_RW | _PAGE_PRESENT); if ( guest_supports_nx(v) ) pass_thru_flags |= _PAGE_NX_BIT; - if ( !shadow_mode_refcounts(d) && !mfn_valid(target_mfn) ) + if ( level == 1 && !shadow_mode_refcounts(d) && mmio_mfn ) pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT; sflags = gflags & pass_thru_flags; @@ -676,10 +681,14 @@ } /* Read-only memory */ - if ( p2m_is_readonly(p2mt) || - (p2mt == p2m_mmio_direct && - rangeset_contains_singleton(mmio_ro_ranges, mfn_x(target_mfn))) ) + if ( p2m_is_readonly(p2mt) ) sflags &= ~_PAGE_RW; + else if ( p2mt == p2m_mmio_direct && + rangeset_contains_singleton(mmio_ro_ranges, mfn_x(target_mfn)) ) + { + sflags &= ~(_PAGE_RW | _PAGE_PAT); + sflags |= _PAGE_PCD | _PAGE_PWT; + } // protect guest page tables // @@ -1185,22 +1194,28 @@ && !sh_l1e_is_magic(new_sl1e) ) { /* About to install a new reference */ - if ( shadow_mode_refcounts(d) ) { + if ( shadow_mode_refcounts(d) ) + { +#define PAGE_FLIPPABLE (_PAGE_RW | _PAGE_PWT | _PAGE_PCD | _PAGE_PAT) + int rc; + TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF); - switch ( shadow_get_page_from_l1e(new_sl1e, d, new_type) ) + switch ( rc = shadow_get_page_from_l1e(new_sl1e, d, new_type) ) { default: /* Doesn't look like a pagetable. */ flags |= SHADOW_SET_ERROR; new_sl1e = shadow_l1e_empty(); break; - case 1: - shadow_l1e_remove_flags(new_sl1e, _PAGE_RW); + case PAGE_FLIPPABLE & -PAGE_FLIPPABLE ... PAGE_FLIPPABLE: + ASSERT(!(rc & ~PAGE_FLIPPABLE)); + new_sl1e = shadow_l1e_flip_flags(new_sl1e, rc); /* fall through */ case 0: shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d); break; } +#undef PAGE_FLIPPABLE } } diff -Nru xen-4.6.0/xen/arch/x86/mm/shadow/types.h xen-4.6.5/xen/arch/x86/mm/shadow/types.h --- xen-4.6.0/xen/arch/x86/mm/shadow/types.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm/shadow/types.h 2017-03-07 16:19:05.000000000 +0000 @@ -99,6 +99,9 @@ static inline shadow_l1e_t shadow_l1e_remove_flags(shadow_l1e_t sl1e, u32 flags) { l1e_remove_flags(sl1e, flags); return sl1e; } +static inline shadow_l1e_t +shadow_l1e_flip_flags(shadow_l1e_t sl1e, u32 flags) +{ l1e_flip_flags(sl1e, flags); return sl1e; } static inline shadow_l1e_t shadow_l1e_empty(void) { return l1e_empty(); } diff -Nru xen-4.6.0/xen/arch/x86/mm.c xen-4.6.5/xen/arch/x86/mm.c --- xen-4.6.0/xen/arch/x86/mm.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mm.c 2017-03-07 16:19:05.000000000 +0000 @@ -160,7 +160,10 @@ static uint32_t base_disallow_mask; /* Global bit is allowed to be set on L1 PTEs. Intended for user mappings. */ #define L1_DISALLOW_MASK ((base_disallow_mask | _PAGE_GNTTAB) & ~_PAGE_GLOBAL) -#define L2_DISALLOW_MASK (base_disallow_mask & ~_PAGE_PSE) + +#define L2_DISALLOW_MASK (unlikely(opt_allow_superpage) \ + ? base_disallow_mask & ~_PAGE_PSE \ + : base_disallow_mask) #define l3_disallow_mask(d) (!is_pv_32bit_domain(d) ? \ base_disallow_mask : 0xFFFFF198U) @@ -175,6 +178,18 @@ is_pv_domain(d)) ? \ L1_DISALLOW_MASK : (L1_DISALLOW_MASK & ~PAGE_CACHE_ATTRS)) +static s8 __read_mostly opt_mmio_relax; +static void __init parse_mmio_relax(const char *s) +{ + if ( !*s ) + opt_mmio_relax = 1; + else + opt_mmio_relax = parse_bool(s); + if ( opt_mmio_relax < 0 && strcmp(s, "all") ) + opt_mmio_relax = 0; +} +custom_param("mmio-relax", parse_mmio_relax); + static void __init init_frametable_chunk(void *start, void *end) { unsigned long s = (unsigned long)start; @@ -502,12 +517,12 @@ make_cr3(v, cr3_mfn); } +static const char __section(".bss.page_aligned") zero_page[PAGE_SIZE]; static void invalidate_shadow_ldt(struct vcpu *v, int flush) { l1_pgentry_t *pl1e; - int i; - unsigned long pfn; + unsigned int i; struct page_info *page; BUG_ON(unlikely(in_irq())); @@ -522,10 +537,10 @@ for ( i = 16; i < 32; i++ ) { - pfn = l1e_get_pfn(pl1e[i]); - if ( pfn == 0 ) continue; + if ( !(l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) ) + continue; + page = l1e_get_page(pl1e[i]); l1e_write(&pl1e[i], l1e_empty()); - page = mfn_to_page(pfn); ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page); ASSERT_PAGE_IS_DOMAIN(page, v->domain); put_page_and_type(page); @@ -796,10 +811,7 @@ if ( !mfn_valid(mfn) || (real_pg_owner = page_get_owner_and_reference(page)) == dom_io ) { -#ifndef NDEBUG - const unsigned long *ro_map; - unsigned int seg, bdf; -#endif + int flip = 0; /* Only needed the reference to confirm dom_io ownership. */ if ( mfn_valid(mfn) ) @@ -833,24 +845,57 @@ return -EINVAL; } - if ( !(l1f & _PAGE_RW) || - !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) - return 0; + if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) + { + /* MMIO pages must not be mapped cachable unless requested so. */ + switch ( opt_mmio_relax ) + { + case 0: + break; + case 1: + if ( !is_hardware_domain(l1e_owner) ) + break; + /* fallthrough */ + case -1: + return 0; + default: + ASSERT_UNREACHABLE(); + } + } + else if ( l1f & _PAGE_RW ) + { #ifndef NDEBUG - if ( !pci_mmcfg_decode(mfn, &seg, &bdf) || - ((ro_map = pci_get_ro_map(seg)) != NULL && - test_bit(bdf, ro_map)) ) - printk(XENLOG_G_WARNING - "d%d: Forcing read-only access to MFN %lx\n", - l1e_owner->domain_id, mfn); - else - rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, - print_mmio_emul_range, - &(struct mmio_emul_range_ctxt){ - .d = l1e_owner, - .mfn = mfn }); + const unsigned long *ro_map; + unsigned int seg, bdf; + + if ( !pci_mmcfg_decode(mfn, &seg, &bdf) || + ((ro_map = pci_get_ro_map(seg)) != NULL && + test_bit(bdf, ro_map)) ) + printk(XENLOG_G_WARNING + "d%d: Forcing read-only access to MFN %lx\n", + l1e_owner->domain_id, mfn); + else + rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, + print_mmio_emul_range, + &(struct mmio_emul_range_ctxt){ + .d = l1e_owner, + .mfn = mfn }); #endif - return 1; + flip = _PAGE_RW; + } + + switch ( l1f & PAGE_CACHE_ATTRS ) + { + case 0: /* WB */ + flip |= _PAGE_PWT | _PAGE_PCD; + break; + case _PAGE_PWT: /* WT */ + case _PAGE_PWT | _PAGE_PAT: /* WP */ + flip |= _PAGE_PCD | (l1f & _PAGE_PAT); + break; + } + + return flip; } if ( unlikely( (real_pg_owner != pg_owner) && @@ -1006,7 +1051,9 @@ rc = get_page_and_type_from_pagenr( l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, 1); - if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) ) + if ( unlikely(rc == -EINVAL) && + !is_pv_32bit_domain(d) && + get_l3_linear_pagetable(l3e, pfn, d) ) rc = 0; return rc; @@ -1240,8 +1287,9 @@ goto fail; case 0: break; - case 1: - l1e_remove_flags(pl1e[i], _PAGE_RW); + case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: + ASSERT(!(ret & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); + l1e_flip_flags(pl1e[i], ret); break; } @@ -1449,13 +1497,20 @@ l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty(); } -void fill_ro_mpt(unsigned long mfn) +bool_t fill_ro_mpt(unsigned long mfn) { l4_pgentry_t *l4tab = map_domain_page(_mfn(mfn)); + bool_t ret = 0; - l4tab[l4_table_offset(RO_MPT_VIRT_START)] = - idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]; + if ( !l4e_get_intpte(l4tab[l4_table_offset(RO_MPT_VIRT_START)]) ) + { + l4tab[l4_table_offset(RO_MPT_VIRT_START)] = + idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]; + ret = 1; + } unmap_domain_page(l4tab); + + return ret; } void zap_ro_mpt(unsigned long mfn) @@ -1513,10 +1568,15 @@ adjust_guest_l4e(pl4e[i], d); } - init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict)); + if ( rc >= 0 ) + { + init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict)); + atomic_inc(&d->arch.pv_domain.nr_l4_pages); + rc = 0; + } unmap_domain_page(pl4e); - return rc > 0 ? 0 : rc; + return rc; } static void free_l1_table(struct page_info *page) @@ -1634,7 +1694,13 @@ unmap_domain_page(pl4e); - return rc > 0 ? 0 : rc; + if ( rc >= 0 ) + { + atomic_dec(&d->arch.pv_domain.nr_l4_pages); + rc = 0; + } + + return rc; } int page_lock(struct page_info *page) @@ -1716,6 +1782,14 @@ _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \ (_m), (_v), (_ad)) +/* + * PTE flags that a guest may change without re-validating the PTE. + * All other bits affect translation, caching, or Xen's safety. + */ +#define FASTPATH_FLAG_WHITELIST \ + (_PAGE_NX_BIT | _PAGE_AVAIL_HIGH | _PAGE_AVAIL | _PAGE_GLOBAL | \ + _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER) + /* Update the L1 entry at pl1e to new value nl1e. */ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, unsigned long gl1mfn, int preserve_ad, @@ -1756,8 +1830,8 @@ return -EINVAL; } - /* Fast path for identical mapping, r/w and presence. */ - if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) + /* Fast path for sufficiently-similar mappings. */ + if ( !l1e_has_changed(ol1e, nl1e, ~FASTPATH_FLAG_WHITELIST) ) { adjust_guest_l1e(nl1e, pt_dom); if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, @@ -1780,8 +1854,9 @@ return rc; case 0: break; - case 1: - l1e_remove_flags(nl1e, _PAGE_RW); + case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: + ASSERT(!(rc & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); + l1e_flip_flags(nl1e, rc); rc = 0; break; } @@ -1838,8 +1913,8 @@ return -EINVAL; } - /* Fast path for identical mapping and presence. */ - if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) ) + /* Fast path for sufficiently-similar mappings. */ + if ( !l2e_has_changed(ol2e, nl2e, ~FASTPATH_FLAG_WHITELIST) ) { adjust_guest_l2e(nl2e, d); if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) ) @@ -1904,8 +1979,8 @@ return -EINVAL; } - /* Fast path for identical mapping and presence. */ - if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) ) + /* Fast path for sufficiently-similar mappings. */ + if ( !l3e_has_changed(ol3e, nl3e, ~FASTPATH_FLAG_WHITELIST) ) { adjust_guest_l3e(nl3e, d); rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, preserve_ad); @@ -1968,8 +2043,8 @@ return -EINVAL; } - /* Fast path for identical mapping and presence. */ - if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) ) + /* Fast path for sufficiently-similar mappings. */ + if ( !l4e_has_changed(ol4e, nl4e, ~FASTPATH_FLAG_WHITELIST) ) { adjust_guest_l4e(nl4e, d); rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, preserve_ad); @@ -2618,6 +2693,9 @@ ASSERT(opt_allow_superpage); + if ( !mfn_valid(mfn | (L1_PAGETABLE_ENTRIES - 1)) ) + return -EINVAL; + spage = mfn_to_spage(mfn); y = spage->type_info; do { @@ -3229,8 +3307,9 @@ case MMUEXT_INVLPG_LOCAL: if ( unlikely(d != pg_owner) ) rc = -EPERM; - else if ( !paging_mode_enabled(d) || - paging_invlpg(curr, op.arg1.linear_addr) != 0 ) + else if ( !paging_mode_enabled(d) + ? __addr_ok(op.arg1.linear_addr) + : paging_invlpg(curr, op.arg1.linear_addr) ) flush_tlb_one_local(op.arg1.linear_addr); break; @@ -3251,7 +3330,7 @@ if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI ) flush_tlb_mask(&pmask); - else + else if ( __addr_ok(op.arg1.linear_addr) ) flush_tlb_one_mask(&pmask, op.arg1.linear_addr); break; } @@ -3264,10 +3343,10 @@ break; case MMUEXT_INVLPG_ALL: - if ( likely(d == pg_owner) ) - flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr); - else + if ( unlikely(d != pg_owner) ) rc = -EPERM; + else if ( __addr_ok(op.arg1.linear_addr) ) + flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr); break; case MMUEXT_FLUSH_CACHE: @@ -3395,42 +3474,26 @@ } case MMUEXT_MARK_SUPER: + case MMUEXT_UNMARK_SUPER: { unsigned long mfn = op.arg1.mfn; - if ( unlikely(d != pg_owner) ) - rc = -EPERM; - else if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) - { - MEM_LOG("Unaligned superpage reference mfn %lx", mfn); - okay = 0; - } - else if ( !opt_allow_superpage ) + if ( !opt_allow_superpage ) { MEM_LOG("Superpages disallowed"); rc = -ENOSYS; } - else - rc = mark_superpage(mfn_to_spage(mfn), d); - break; - } - - case MMUEXT_UNMARK_SUPER: - { - unsigned long mfn = op.arg1.mfn; - - if ( unlikely(d != pg_owner) ) + else if ( unlikely(d != pg_owner) ) rc = -EPERM; - else if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) + else if ( mfn & (L1_PAGETABLE_ENTRIES - 1) ) { MEM_LOG("Unaligned superpage reference mfn %lx", mfn); - okay = 0; - } - else if ( !opt_allow_superpage ) - { - MEM_LOG("Superpages disallowed"); - rc = -ENOSYS; + rc = -EINVAL; } + else if ( !mfn_valid(mfn | (L1_PAGETABLE_ENTRIES - 1)) ) + rc = -EINVAL; + else if ( op.cmd == MMUEXT_MARK_SUPER ) + rc = mark_superpage(mfn_to_spage(mfn), d); else rc = unmark_superpage(mfn_to_spage(mfn)); break; @@ -4420,16 +4483,17 @@ void destroy_gdt(struct vcpu *v) { l1_pgentry_t *pl1e; - int i; - unsigned long pfn; + unsigned int i; + unsigned long pfn, zero_pfn = PFN_DOWN(__pa(zero_page)); v->arch.pv_vcpu.gdt_ents = 0; pl1e = gdt_ldt_ptes(v->domain, v); for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ ) { - if ( (pfn = l1e_get_pfn(pl1e[i])) != 0 ) + pfn = l1e_get_pfn(pl1e[i]); + if ( (l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) && pfn != zero_pfn ) put_page_and_type(mfn_to_page(pfn)); - l1e_write(&pl1e[i], l1e_empty()); + l1e_write(&pl1e[i], l1e_from_pfn(zero_pfn, __PAGE_HYPERVISOR_RO)); v->arch.pv_vcpu.gdt_frames[i] = 0; } } @@ -4442,7 +4506,7 @@ struct domain *d = v->domain; l1_pgentry_t *pl1e; /* NB. There are 512 8-byte entries per GDT page. */ - int i, nr_pages = (entries + 511) / 512; + unsigned int i, nr_pages = (entries + 511) / 512; if ( entries > FIRST_RESERVED_GDT_ENTRY ) return -EINVAL; @@ -4980,10 +5044,11 @@ unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - unsigned int rc; + unsigned int rc = bytes; unsigned long addr = offset; - if ( (rc = copy_from_user(p_data, (void *)addr, bytes)) != 0 ) + if ( !__addr_ok(addr) || + (rc = __copy_from_user(p_data, (void *)addr, bytes)) ) { propagate_page_fault(addr + bytes - rc, 0); /* read fault */ return X86EMUL_EXCEPTION; @@ -5006,6 +5071,7 @@ l1_pgentry_t pte, ol1e, nl1e, *pl1e; struct vcpu *v = current; struct domain *d = v->domain; + int ret; /* Only allow naturally-aligned stores within the original %cr2 page. */ if ( unlikely(((addr^ptwr_ctxt->cr2) & PAGE_MASK) || (addr & (bytes-1))) ) @@ -5053,7 +5119,7 @@ /* Check the new PTE. */ nl1e = l1e_from_intpte(val); - switch ( get_page_from_l1e(nl1e, d, d) ) + switch ( ret = get_page_from_l1e(nl1e, d, d) ) { default: if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) && @@ -5077,8 +5143,9 @@ break; case 0: break; - case 1: - l1e_remove_flags(nl1e, _PAGE_RW); + case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: + ASSERT(!(ret & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); + l1e_flip_flags(nl1e, ret); break; } @@ -5130,7 +5197,7 @@ { paddr_t val = 0; - if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes -1)) ) + if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes - 1)) || !bytes ) { MEM_LOG("ptwr_emulate: bad write size (addr=%lx, bytes=%u)", offset, bytes); @@ -5236,34 +5303,18 @@ * fault handling for read-only MMIO pages */ -struct mmio_ro_emulate_ctxt { - struct x86_emulate_ctxt ctxt; - unsigned long cr2; - unsigned int seg, bdf; -}; - -static int mmio_ro_emulated_read( - enum x86_segment seg, - unsigned long offset, - void *p_data, - unsigned int bytes, - struct x86_emulate_ctxt *ctxt) -{ - return X86EMUL_UNHANDLEABLE; -} - -static int mmio_ro_emulated_write( +int mmio_ro_emulated_write( enum x86_segment seg, unsigned long offset, void *p_data, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - struct mmio_ro_emulate_ctxt *mmio_ro_ctxt = - container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt); + struct mmio_ro_emulate_ctxt *mmio_ro_ctxt = ctxt->data; /* Only allow naturally-aligned stores at the original %cr2 address. */ - if ( ((bytes | offset) & (bytes - 1)) || offset != mmio_ro_ctxt->cr2 ) + if ( ((bytes | offset) & (bytes - 1)) || !bytes || + offset != mmio_ro_ctxt->cr2 ) { MEM_LOG("mmio_ro_emulate: bad access (cr2=%lx, addr=%lx, bytes=%u)", mmio_ro_ctxt->cr2, offset, bytes); @@ -5274,47 +5325,46 @@ } static const struct x86_emulate_ops mmio_ro_emulate_ops = { - .read = mmio_ro_emulated_read, + .read = x86emul_unhandleable_rw, .insn_fetch = ptwr_emulated_read, .write = mmio_ro_emulated_write, }; -static int mmio_intercept_write( +int mmcfg_intercept_write( enum x86_segment seg, unsigned long offset, void *p_data, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - struct mmio_ro_emulate_ctxt *mmio_ctxt = - container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt); + struct mmio_ro_emulate_ctxt *mmio_ctxt = ctxt->data; /* * Only allow naturally-aligned stores no wider than 4 bytes to the * original %cr2 address. */ - if ( ((bytes | offset) & (bytes - 1)) || bytes > 4 || + if ( ((bytes | offset) & (bytes - 1)) || bytes > 4 || !bytes || offset != mmio_ctxt->cr2 ) { - MEM_LOG("mmio_intercept: bad write (cr2=%lx, addr=%lx, bytes=%u)", + MEM_LOG("mmcfg_intercept: bad write (cr2=%lx, addr=%lx, bytes=%u)", mmio_ctxt->cr2, offset, bytes); return X86EMUL_UNHANDLEABLE; } offset &= 0xfff; - pci_conf_write_intercept(mmio_ctxt->seg, mmio_ctxt->bdf, offset, bytes, - p_data); - pci_mmcfg_write(mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf), - PCI_DEVFN2(mmio_ctxt->bdf), offset, bytes, - *(uint32_t *)p_data); + if ( pci_conf_write_intercept(mmio_ctxt->seg, mmio_ctxt->bdf, + offset, bytes, p_data) >= 0 ) + pci_mmcfg_write(mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf), + PCI_DEVFN2(mmio_ctxt->bdf), offset, bytes, + *(uint32_t *)p_data); return X86EMUL_OKAY; } -static const struct x86_emulate_ops mmio_intercept_ops = { - .read = mmio_ro_emulated_read, +static const struct x86_emulate_ops mmcfg_intercept_ops = { + .read = x86emul_unhandleable_rw, .insn_fetch = ptwr_emulated_read, - .write = mmio_intercept_write, + .write = mmcfg_intercept_write, }; /* Check if guest is trying to modify a r/o MMIO page. */ @@ -5324,14 +5374,14 @@ l1_pgentry_t pte; unsigned long mfn; unsigned int addr_size = is_pv_32bit_vcpu(v) ? 32 : BITS_PER_LONG; - struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { - .ctxt.regs = regs, - .ctxt.addr_size = addr_size, - .ctxt.sp_size = addr_size, - .ctxt.swint_emulate = x86_swint_emulate_none, - .cr2 = addr + struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = addr }; + struct x86_emulate_ctxt ctxt = { + .regs = regs, + .addr_size = addr_size, + .sp_size = addr_size, + .swint_emulate = x86_swint_emulate_none, + .data = &mmio_ro_ctxt }; - const unsigned long *ro_map; int rc; /* Attempt to read the PTE that maps the VA being accessed. */ @@ -5356,12 +5406,10 @@ if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) return 0; - if ( pci_mmcfg_decode(mfn, &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) && - ((ro_map = pci_get_ro_map(mmio_ro_ctxt.seg)) == NULL || - !test_bit(mmio_ro_ctxt.bdf, ro_map)) ) - rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_intercept_ops); + if ( pci_ro_mmcfg_decode(mfn, &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) ) + rc = x86_emulate(&ctxt, &mmcfg_intercept_ops); else - rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops); + rc = x86_emulate(&ctxt, &mmio_ro_emulate_ops); return rc != X86EMUL_UNHANDLEABLE ? EXCRET_fault_fixed : 0; } @@ -5515,7 +5563,12 @@ flush_flags |= FLUSH_TLB_GLOBAL; \ if ( (flags & _PAGE_PRESENT) && \ (((o_) ^ flags) & PAGE_CACHE_ATTRS) ) \ + { \ flush_flags |= FLUSH_CACHE; \ + if ( virt >= DIRECTMAP_VIRT_START && \ + virt < HYPERVISOR_VIRT_END ) \ + flush_flags |= FLUSH_VA_VALID; \ + } \ } while (0) while ( nr_mfns != 0 ) diff -Nru xen-4.6.0/xen/arch/x86/mpparse.c xen-4.6.5/xen/arch/x86/mpparse.c --- xen-4.6.0/xen/arch/x86/mpparse.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/mpparse.c 2017-03-07 16:19:05.000000000 +0000 @@ -89,19 +89,14 @@ void __init set_nr_sockets(void) { - /* - * Count the actual cpus in the socket 0 and use it to calculate nr_sockets - * so that the latter will be always >= the actual socket number in the - * system even when APIC IDs from MP table are too sparse. - */ - unsigned int cpus = bitmap_weight(phys_cpu_present_map.mask, - boot_cpu_data.x86_max_cores * - boot_cpu_data.x86_num_siblings); - - if ( cpus == 0 ) - cpus = 1; - - nr_sockets = DIV_ROUND_UP(num_processors + disabled_cpus, cpus); + nr_sockets = last_physid(phys_cpu_present_map) + / boot_cpu_data.x86_max_cores + / boot_cpu_data.x86_num_siblings + 1; + if (disabled_cpus) + nr_sockets += (disabled_cpus - 1) + / boot_cpu_data.x86_max_cores + / boot_cpu_data.x86_num_siblings + 1; + printk(XENLOG_DEBUG "nr_sockets: %u\n", nr_sockets); } /* diff -Nru xen-4.6.0/xen/arch/x86/msi.c xen-4.6.5/xen/arch/x86/msi.c --- xen-4.6.0/xen/arch/x86/msi.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/msi.c 2017-03-07 16:19:05.000000000 +0000 @@ -160,42 +160,37 @@ */ void msi_compose_msg(unsigned vector, const cpumask_t *cpu_mask, struct msi_msg *msg) { - unsigned dest; - memset(msg, 0, sizeof(*msg)); - if ( !cpumask_intersects(cpu_mask, &cpu_online_map) ) - { - dprintk(XENLOG_ERR,"%s, compose msi message error!!\n", __func__); + + if ( vector < FIRST_DYNAMIC_VECTOR ) return; - } - if ( vector ) + if ( cpu_mask ) { cpumask_t *mask = this_cpu(scratch_mask); - cpumask_and(mask, cpu_mask, &cpu_online_map); - dest = cpu_mask_to_apicid(mask); + if ( !cpumask_intersects(cpu_mask, &cpu_online_map) ) + return; - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? - MSI_ADDR_DESTMODE_PHYS: - MSI_ADDR_DESTMODE_LOGIC) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - msg->dest32 = dest; - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(vector); + cpumask_and(mask, cpu_mask, &cpu_online_map); + msg->dest32 = cpu_mask_to_apicid(mask); } + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO | + (INT_DEST_MODE ? MSI_ADDR_DESTMODE_LOGIC + : MSI_ADDR_DESTMODE_PHYS) | + ((INT_DELIVERY_MODE != dest_LowestPrio) + ? MSI_ADDR_REDIRECTION_CPU + : MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(msg->dest32); + + msg->data = MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((INT_DELIVERY_MODE != dest_LowestPrio) + ? MSI_DATA_DELIVERY_FIXED + : MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(vector); } static bool_t read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) @@ -434,8 +429,13 @@ { writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + if ( likely(control & PCI_MSIX_FLAGS_ENABLE) ) break; + + entry->msi_attrib.host_masked = host; + entry->msi_attrib.guest_masked = guest; + flag = 1; } else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) ) @@ -1292,17 +1292,17 @@ PCI_CAP_ID_MSIX); ASSERT(pos); - if ( reg < pos || reg >= msix_pba_offset_reg(pos) + 4 ) - return 0; - - if ( reg != msix_control_reg(pos) || size != 2 ) - return -EACCES; + if ( reg >= pos && reg < msix_pba_offset_reg(pos) + 4 ) + { + if ( reg != msix_control_reg(pos) || size != 2 ) + return -EACCES; - pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL); - if ( pdev->msix->host_maskall ) - *data |= PCI_MSIX_FLAGS_MASKALL; + pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL); + if ( pdev->msix->host_maskall ) + *data |= PCI_MSIX_FLAGS_MASKALL; - return 1; + return 1; + } } entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI); diff -Nru xen-4.6.0/xen/arch/x86/numa.c xen-4.6.5/xen/arch/x86/numa.c --- xen-4.6.0/xen/arch/x86/numa.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/numa.c 2017-03-07 16:19:05.000000000 +0000 @@ -349,18 +349,32 @@ u32 apicid = x86_cpu_to_apicid[i]; if ( apicid == BAD_APICID ) continue; - node = apicid_to_node[apicid]; + node = apicid < MAX_LOCAL_APIC ? apicid_to_node[apicid] : NUMA_NO_NODE; if ( node == NUMA_NO_NODE || !node_online(node) ) node = 0; numa_set_node(i, node); } } -EXPORT_SYMBOL(cpu_to_node); -EXPORT_SYMBOL(node_to_cpumask); -EXPORT_SYMBOL(memnode_shift); -EXPORT_SYMBOL(memnodemap); -EXPORT_SYMBOL(node_data); +unsigned int __init arch_get_dma_bitsize(void) +{ + unsigned int node; + + for_each_online_node(node) + if ( node_spanned_pages(node) && + !(node_start_pfn(node) >> (32 - PAGE_SHIFT)) ) + break; + if ( node >= MAX_NUMNODES ) + panic("No node with memory below 4Gb"); + + /* + * Try to not reserve the whole node's memory for DMA, but dividing + * its spanned pages by (arbitrarily chosen) 4. + */ + return min_t(unsigned int, + flsl(node_start_pfn(node) + node_spanned_pages(node) / 4 - 1) + + PAGE_SHIFT, 32); +} static void dump_numa(unsigned char key) { diff -Nru xen-4.6.0/xen/arch/x86/setup.c xen-4.6.5/xen/arch/x86/setup.c --- xen-4.6.0/xen/arch/x86/setup.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/setup.c 2017-03-07 16:19:05.000000000 +0000 @@ -67,6 +67,8 @@ static bool_t __initdata disable_smap; invbool_param("smap", disable_smap); +unsigned long __read_mostly cr4_pv32_mask; + /* Boot dom0 in pvh mode */ static bool_t __initdata opt_dom0pvh; boolean_param("dom0pvh", opt_dom0pvh); @@ -200,7 +202,7 @@ nodeid_t node; u32 apicid = x86_cpu_to_apicid[cpu]; - node = apicid_to_node[apicid]; + node = apicid < MAX_LOCAL_APIC ? apicid_to_node[apicid] : NUMA_NO_NODE; if ( node == NUMA_NO_NODE ) node = 0; @@ -625,7 +627,7 @@ if ( cpu_has_efer ) rdmsrl(MSR_EFER, this_cpu(efer)); - asm volatile ( "mov %%cr4,%0" : "=r" (this_cpu(cr4)) ); + asm volatile ( "mov %%cr4,%0" : "=r" (get_cpu_info()->cr4) ); /* We initialise the serial devices very early so we can get debugging. */ ns16550.io_base = 0x3f8; @@ -1304,6 +1306,8 @@ if ( cpu_has_smap ) set_in_cr4(X86_CR4_SMAP); + cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS; + if ( cpu_has_fsgsbase ) set_in_cr4(X86_CR4_FSGSBASE); @@ -1440,7 +1444,10 @@ * copy_from_user(). */ if ( cpu_has_smap ) + { + cr4_pv32_mask &= ~X86_CR4_SMAP; write_cr4(read_cr4() & ~X86_CR4_SMAP); + } printk("%sNX (Execute Disable) protection %sactive\n", cpu_has_nx ? XENLOG_INFO : XENLOG_WARNING "Warning: ", @@ -1457,7 +1464,10 @@ panic("Could not set up DOM0 guest OS"); if ( cpu_has_smap ) + { write_cr4(read_cr4() | X86_CR4_SMAP); + cr4_pv32_mask |= X86_CR4_SMAP; + } /* Scrub RAM that is still free and so may go to an unprivileged domain. */ scrub_heap_pages(); diff -Nru xen-4.6.0/xen/arch/x86/smpboot.c xen-4.6.5/xen/arch/x86/smpboot.c --- xen-4.6.0/xen/arch/x86/smpboot.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/smpboot.c 2017-03-07 16:19:05.000000000 +0000 @@ -993,7 +993,8 @@ cpu = node; goto out; } - apicid_to_node[apic_id] = node; + if ( apic_id < MAX_LOCAL_APIC ) + apicid_to_node[apic_id] = node; } /* Physically added CPUs do not have synchronised TSC. */ diff -Nru xen-4.6.0/xen/arch/x86/srat.c xen-4.6.5/xen/arch/x86/srat.c --- xen-4.6.0/xen/arch/x86/srat.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/srat.c 2017-03-07 16:19:05.000000000 +0000 @@ -209,7 +209,6 @@ { unsigned pxm; nodeid_t node; - u32 apic_id; if (srat_disabled()) return; @@ -217,8 +216,13 @@ bad_srat(); return; } - if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) + if (!(pa->flags & ACPI_SRAT_CPU_ENABLED)) + return; + if (pa->apic_id >= MAX_LOCAL_APIC) { + printk(KERN_INFO "SRAT: APIC %08x ignored\n", pa->apic_id); return; + } + pxm = pa->proximity_domain; node = setup_node(pxm); if (node == NUMA_NO_NODE) { @@ -226,11 +230,11 @@ return; } - apic_id = pa->apic_id; - apicid_to_node[apic_id] = node; + apicid_to_node[pa->apic_id] = node; + node_set(node, processor_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", - pxm, apic_id, node); + printk(KERN_INFO "SRAT: PXM %u -> APIC %08x -> Node %u\n", + pxm, pa->apic_id, node); } /* Callback for Proximity Domain -> LAPIC mapping */ @@ -262,7 +266,7 @@ apicid_to_node[pa->apic_id] = node; node_set(node, processor_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + printk(KERN_INFO "SRAT: PXM %u -> APIC %02x -> Node %u\n", pxm, pa->apic_id, node); } diff -Nru xen-4.6.0/xen/arch/x86/tboot.c xen-4.6.5/xen/arch/x86/tboot.c --- xen-4.6.0/xen/arch/x86/tboot.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/tboot.c 2017-03-07 16:19:05.000000000 +0000 @@ -229,9 +229,10 @@ if ( !is_idle_domain(d) ) { - struct hvm_iommu *hd = domain_hvm_iommu(d); - update_iommu_mac(&ctx, hd->arch.pgd_maddr, - agaw_to_level(hd->arch.agaw)); + const struct domain_iommu *dio = dom_iommu(d); + + update_iommu_mac(&ctx, dio->arch.pgd_maddr, + agaw_to_level(dio->arch.agaw)); } } diff -Nru xen-4.6.0/xen/arch/x86/time.c xen-4.6.5/xen/arch/x86/time.c --- xen-4.6.0/xen/arch/x86/time.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/time.c 2017-03-07 16:19:05.000000000 +0000 @@ -1757,7 +1757,12 @@ u64 gtime_to_gtsc(struct domain *d, u64 time) { if ( !is_hvm_domain(d) ) - time = max_t(s64, time - d->arch.vtsc_offset, 0); + { + if ( time < d->arch.vtsc_offset ) + return -scale_delta(d->arch.vtsc_offset - time, + &d->arch.ns_to_vtsc); + time -= d->arch.vtsc_offset; + } return scale_delta(time, &d->arch.ns_to_vtsc); } @@ -1979,7 +1984,7 @@ break; } d->arch.incarnation = incarnation + 1; - if ( is_hvm_domain(d) ) + if ( has_hvm_container_domain(d) ) { hvm_set_rdtsc_exiting(d, d->arch.vtsc); if ( d->vcpu && d->vcpu[0] && incarnation == 0 ) diff -Nru xen-4.6.0/xen/arch/x86/traps.c xen-4.6.5/xen/arch/x86/traps.c --- xen-4.6.0/xen/arch/x86/traps.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/traps.c 2017-03-07 16:19:05.000000000 +0000 @@ -808,6 +808,11 @@ break; case 4: + if ( !has_hvm_container_domain(currd) ) + { + *eax = *ebx = *ecx = *edx = 0; + break; + } hvm_hypervisor_cpuid_leaf(sub_idx, eax, ebx, ecx, edx); break; @@ -967,6 +972,7 @@ __clear_bit(X86_FEATURE_LWP % 32, &c); __clear_bit(X86_FEATURE_NODEID_MSR % 32, &c); __clear_bit(X86_FEATURE_TOPOEXT % 32, &c); + __clear_bit(X86_FEATURE_MWAITX % 32, &c); break; case 0x0000000a: /* Architectural Performance Monitor Features (Intel) */ @@ -2514,19 +2520,22 @@ switch ( regs->_ecx ) { case MSR_FS_BASE: - if ( is_pv_32bit_domain(currd) ) + if ( is_pv_32bit_domain(currd) || + !is_canonical_address(msr_content) ) goto fail; wrfsbase(msr_content); v->arch.pv_vcpu.fs_base = msr_content; break; case MSR_GS_BASE: - if ( is_pv_32bit_domain(currd) ) + if ( is_pv_32bit_domain(currd) || + !is_canonical_address(msr_content) ) goto fail; wrgsbase(msr_content); v->arch.pv_vcpu.gs_base_kernel = msr_content; break; case MSR_SHADOW_GS_BASE: - if ( is_pv_32bit_domain(currd) ) + if ( is_pv_32bit_domain(currd) || + !is_canonical_address(msr_content) ) goto fail; if ( wrmsr_safe(MSR_SHADOW_GS_BASE, msr_content) ) goto fail; @@ -2647,6 +2656,14 @@ if ( v->arch.debugreg[7] & DR7_ACTIVE_MASK ) wrmsrl(regs->_ecx, msr_content); break; + + case MSR_INTEL_PLATFORM_INFO: + if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + msr_content || + rdmsr_safe(MSR_INTEL_PLATFORM_INFO, msr_content) ) + goto fail; + break; + case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7): case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3): case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2: @@ -2663,8 +2680,8 @@ if ( vpmu_do_wrmsr(regs->ecx, msr_content, 0) ) goto fail; + break; } - break; } /*FALLTHROUGH*/ @@ -2774,6 +2791,14 @@ /* No extra capabilities are supported */ regs->eax = regs->edx = 0; break; + + case MSR_INTEL_PLATFORM_INFO: + if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val) ) + goto fail; + regs->eax = regs->edx = 0; + break; + case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7): case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3): case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2: @@ -2798,8 +2823,8 @@ regs->eax = (uint32_t)val; regs->edx = (uint32_t)(val >> 32); + break; } - break; } /*FALLTHROUGH*/ @@ -2967,7 +2992,7 @@ sib = insn_fetch(u8, base, eip, limit); modrm = (modrm & ~7) | (sib & 7); - if ( (sib >>= 3) != 4 ) + if ( ((sib >>= 3) & 7) != 4 ) opnd_off = *(unsigned long *) decode_register(sib & 7, regs, 0); opnd_off <<= sib >> 3; @@ -3027,7 +3052,10 @@ opnd_off += insn_fetch(s8, base, eip, limit); break; case 0x80: - opnd_off += insn_fetch(s32, base, eip, limit); + if ( ad_bytes > 2 ) + opnd_off += insn_fetch(s32, base, eip, limit); + else + opnd_off += insn_fetch(s16, base, eip, limit); break; } if ( ad_bytes == 4 ) @@ -3064,8 +3092,7 @@ #define ad_default ad_bytes opnd_sel = insn_fetch(u16, base, opnd_off, limit); #undef ad_default - ASSERT((opnd_sel & ~3) == regs->error_code); - if ( dpl < (opnd_sel & 3) ) + if ( (opnd_sel & ~3) != regs->error_code || dpl < (opnd_sel & 3) ) { do_guest_trap(TRAP_gp_fault, regs, 1); return; diff -Nru xen-4.6.0/xen/arch/x86/x86_64/asm-offsets.c xen-4.6.5/xen/arch/x86/x86_64/asm-offsets.c --- xen-4.6.0/xen/arch/x86/x86_64/asm-offsets.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/x86_64/asm-offsets.c 2017-03-07 16:19:05.000000000 +0000 @@ -135,6 +135,7 @@ OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs); OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id); OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); + OFFSET(CPUINFO_cr4, struct cpu_info, cr4); DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); BLANK(); diff -Nru xen-4.6.0/xen/arch/x86/x86_64/compat/entry.S xen-4.6.5/xen/arch/x86/x86_64/compat/entry.S --- xen-4.6.0/xen/arch/x86/x86_64/compat/entry.S 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/x86_64/compat/entry.S 2017-03-07 16:19:05.000000000 +0000 @@ -16,14 +16,16 @@ ASM_CLAC pushq $0 SAVE_VOLATILE type=TRAP_syscall compat=1 + CR4_PV32_RESTORE cmpb $0,untrusted_msi(%rip) UNLIKELY_START(ne, msi_check) movl $HYPERCALL_VECTOR,%edi call check_for_unexpected_msi - LOAD_C_CLOBBERED + LOAD_C_CLOBBERED compat=1 ax=0 UNLIKELY_END(msi_check) + movl UREGS_rax(%rsp),%eax GET_CURRENT(%rbx) cmpl $NR_hypercalls,%eax @@ -33,7 +35,6 @@ pushq UREGS_rbx(%rsp); pushq %rcx; pushq %rdx; pushq %rsi; pushq %rdi pushq UREGS_rbp+5*8(%rsp) leaq compat_hypercall_args_table(%rip),%r10 - movl %eax,%eax movl $6,%ecx subb (%r10,%rax,1),%cl movq %rsp,%rdi @@ -48,7 +49,6 @@ #define SHADOW_BYTES 16 /* Shadow EIP + shadow hypercall # */ #else /* Relocate argument registers and zero-extend to 64 bits. */ - movl %eax,%eax /* Hypercall # */ xchgl %ecx,%esi /* Arg 2, Arg 4 */ movl %edx,%edx /* Arg 3 */ movl %edi,%r8d /* Arg 5 */ @@ -174,6 +174,45 @@ /* %rbx: struct vcpu, interrupts disabled */ ENTRY(compat_restore_all_guest) ASSERT_INTERRUPTS_DISABLED + mov $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),%r11d + and UREGS_eflags(%rsp),%r11d +.Lcr4_orig: + .skip .Lcr4_alt_end - .Lcr4_alt, 0x90 +.Lcr4_orig_end: + .pushsection .altinstr_replacement, "ax" +.Lcr4_alt: + testb $3,UREGS_cs(%rsp) + jpe .Lcr4_alt_end + mov CPUINFO_cr4-CPUINFO_guest_cpu_user_regs(%rsp), %rax + and $~XEN_CR4_PV32_BITS, %rax +1: + mov %rax, CPUINFO_cr4-CPUINFO_guest_cpu_user_regs(%rsp) + mov %rax, %cr4 + /* + * An NMI or MCE may have occurred between the previous two + * instructions, leaving register and cache in a state where + * the next exit from the guest would trigger the BUG in + * cr4_pv32_restore. If this happened, the cached value is no + * longer what we just set it to, which we can utilize to + * correct that state. Note that we do not have to fear this + * loop to cause a live lock: If NMIs/MCEs occurred at that + * high a rate, we'd be live locked anyway. + */ + cmp %rax, CPUINFO_cr4-CPUINFO_guest_cpu_user_regs(%rsp) + jne 1b +.Lcr4_alt_end: + .section .altinstructions, "a" + altinstruction_entry .Lcr4_orig, .Lcr4_orig, X86_FEATURE_ALWAYS, \ + (.Lcr4_orig_end - .Lcr4_orig), 0 + altinstruction_entry .Lcr4_orig, .Lcr4_alt, X86_FEATURE_SMEP, \ + (.Lcr4_orig_end - .Lcr4_orig), \ + (.Lcr4_alt_end - .Lcr4_alt) + altinstruction_entry .Lcr4_orig, .Lcr4_alt, X86_FEATURE_SMAP, \ + (.Lcr4_orig_end - .Lcr4_orig), \ + (.Lcr4_alt_end - .Lcr4_alt) + .popsection + or $X86_EFLAGS_IF,%r11 + mov %r11d,UREGS_eflags(%rsp) RESTORE_ALL adj=8 compat=1 .Lft0: iretq @@ -210,6 +249,38 @@ _ASM_PRE_EXTABLE(.Lft0, .Lfx0) _ASM_EXTABLE(.Ldf0, compat_failsafe_callback) +/* This mustn't modify registers other than %rax. */ +ENTRY(cr4_pv32_restore) + push %rdx + GET_CPUINFO_FIELD(cr4, %rdx) + mov (%rdx), %rax + test $XEN_CR4_PV32_BITS, %eax + jnz 0f + or cr4_pv32_mask(%rip), %rax + mov %rax, %cr4 + mov %rax, (%rdx) + pop %rdx + ret +0: +#ifndef NDEBUG + /* Check that _all_ of the bits intended to be set actually are. */ + mov %cr4, %rax + and cr4_pv32_mask(%rip), %eax + cmp cr4_pv32_mask(%rip), %eax + je 1f + /* Cause cr4_pv32_mask to be visible in the BUG register dump. */ + mov cr4_pv32_mask(%rip), %rdx + /* Avoid coming back here while handling the #UD we cause below. */ + mov %cr4, %rcx + or %rdx, %rcx + mov %rcx, %cr4 + BUG +1: +#endif + pop %rdx + xor %eax, %eax + ret + /* %rdx: trap_bounce, %rbx: struct vcpu */ ENTRY(compat_post_handle_exception) testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx) @@ -222,6 +293,7 @@ /* See lstar_enter for entry register state. */ ENTRY(cstar_enter) sti + CR4_PV32_RESTORE movq 8(%rsp),%rax /* Restore %rax. */ movq $FLAT_KERNEL_SS,8(%rsp) pushq %r11 @@ -257,6 +329,7 @@ jmp .Lcompat_bounce_exception ENTRY(compat_sysenter) + CR4_PV32_RESTORE movq VCPU_trap_ctxt(%rbx),%rcx cmpb $TRAP_gp_fault,UREGS_entry_vector(%rsp) movzwl VCPU_sysenter_sel(%rbx),%eax @@ -270,6 +343,7 @@ jmp compat_test_all_events ENTRY(compat_int80_direct_trap) + CR4_PV32_RESTORE call compat_create_bounce_frame jmp compat_test_all_events @@ -280,6 +354,7 @@ compat_create_bounce_frame: ASSERT_INTERRUPTS_ENABLED mov %fs,%edi + ASM_STAC testb $2,UREGS_cs+8(%rsp) jz 1f /* Push new frame at registered guest-OS stack base. */ @@ -333,6 +408,7 @@ movl %ds,%eax .Lft12: movl %eax,%fs:0*4(%rsi) # DS UNLIKELY_END(compat_bounce_failsafe) + ASM_CLAC /* Rewrite our stack frame and return to guest-OS mode. */ /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */ andl $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\ @@ -378,6 +454,7 @@ addl $4,%esi compat_crash_page_fault: .Lft14: mov %edi,%fs + ASM_CLAC movl %esi,%edi call show_page_walk jmp dom_crash_sync_extable diff -Nru xen-4.6.0/xen/arch/x86/x86_64/entry.S xen-4.6.5/xen/arch/x86/x86_64/entry.S --- xen-4.6.0/xen/arch/x86/x86_64/entry.S 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/x86_64/entry.S 2017-03-07 16:19:05.000000000 +0000 @@ -40,28 +40,29 @@ testw $TRAP_syscall,4(%rsp) jz iret_exit_to_guest + movq 24(%rsp),%r11 # RFLAGS + andq $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),%r11 + orq $X86_EFLAGS_IF,%r11 + /* Don't use SYSRET path if the return address is not canonical. */ movq 8(%rsp),%rcx sarq $47,%rcx incl %ecx cmpl $1,%ecx - ja .Lforce_iret + movq 8(%rsp),%rcx # RIP + ja iret_exit_to_guest cmpw $FLAT_USER_CS32,16(%rsp)# CS - movq 8(%rsp),%rcx # RIP - movq 24(%rsp),%r11 # RFLAGS movq 32(%rsp),%rsp # RSP je 1f sysretq 1: sysretl -.Lforce_iret: - /* Mimic SYSRET behavior. */ - movq 8(%rsp),%rcx # RIP - movq 24(%rsp),%r11 # RFLAGS ALIGN /* No special register assumptions. */ iret_exit_to_guest: + andl $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),24(%rsp) + orl $X86_EFLAGS_IF,24(%rsp) addq $8,%rsp .Lft0: iretq @@ -462,9 +463,11 @@ domain_crash_page_fault_8: addq $8,%rsi domain_crash_page_fault: + ASM_CLAC movq %rsi,%rdi call show_page_walk ENTRY(dom_crash_sync_extable) + ASM_CLAC # Get out of the guest-save area of the stack. GET_STACK_BASE(%rax) leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp @@ -480,6 +483,7 @@ ENTRY(common_interrupt) SAVE_ALL CLAC + CR4_PV32_RESTORE movq %rsp,%rdi callq do_IRQ jmp ret_from_intr @@ -500,13 +504,67 @@ GLOBAL(handle_exception) SAVE_ALL CLAC handle_exception_saved: + GET_CURRENT(%rbx) testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp) jz exception_with_ints_disabled + +.Lcr4_pv32_orig: + jmp .Lcr4_pv32_done + .skip (.Lcr4_pv32_alt_end - .Lcr4_pv32_alt) - (. - .Lcr4_pv32_orig), 0xcc + .pushsection .altinstr_replacement, "ax" +.Lcr4_pv32_alt: + mov VCPU_domain(%rbx),%rax +.Lcr4_pv32_alt_end: + .section .altinstructions, "a" + altinstruction_entry .Lcr4_pv32_orig, .Lcr4_pv32_alt, \ + X86_FEATURE_SMEP, \ + (.Lcr4_pv32_alt_end - .Lcr4_pv32_alt), \ + (.Lcr4_pv32_alt_end - .Lcr4_pv32_alt) + altinstruction_entry .Lcr4_pv32_orig, .Lcr4_pv32_alt, \ + X86_FEATURE_SMAP, \ + (.Lcr4_pv32_alt_end - .Lcr4_pv32_alt), \ + (.Lcr4_pv32_alt_end - .Lcr4_pv32_alt) + .popsection + + testb $3,UREGS_cs(%rsp) + jz .Lcr4_pv32_done + cmpb $0,DOMAIN_is_32bit_pv(%rax) + je .Lcr4_pv32_done + call cr4_pv32_restore + /* + * An NMI or #MC may occur between clearing CR4.SMEP / CR4.SMAP in + * compat_restore_all_guest and it actually returning to guest + * context, in which case the guest would run with the two features + * enabled. The only bad that can happen from this is a kernel mode + * #PF which the guest doesn't expect. Rather than trying to make the + * NMI/#MC exit path honor the intended CR4 setting, simply check + * whether the wrong CR4 was in use when the #PF occurred, and exit + * back to the guest (which will in turn clear the two CR4 bits) to + * re-execute the instruction. If we get back here, the CR4 bits + * should then be found clear (unless another NMI/#MC occurred at + * exactly the right time), and we'll continue processing the + * exception as normal. + */ + test %rax,%rax + jnz .Lcr4_pv32_done + /* + * The below effectively is + * if ( regs->entry_vector == TRAP_page_fault && + * (regs->error_code & PFEC_page_present) && + * !(regs->error_code & ~(PFEC_write_access|PFEC_insn_fetch)) ) + * goto compat_test_all_events; + */ + mov $PFEC_page_present,%al + cmpb $TRAP_page_fault,UREGS_entry_vector(%rsp) + jne .Lcr4_pv32_done + xor UREGS_error_code(%rsp),%eax + test $~(PFEC_write_access|PFEC_insn_fetch),%eax + jz compat_test_all_events +.Lcr4_pv32_done: sti 1: movq %rsp,%rdi movzbl UREGS_entry_vector(%rsp),%eax leaq exception_table(%rip),%rdx - GET_CURRENT(%rbx) PERFC_INCR(exceptions, %rax, %rbx) callq *(%rdx,%rax,8) testb $3,UREGS_cs(%rsp) @@ -636,6 +694,7 @@ movl $TRAP_nmi,4(%rsp) handle_ist_exception: SAVE_ALL CLAC + CR4_PV32_RESTORE testb $3,UREGS_cs(%rsp) jz 1f /* Interrupted guest context. Copy the context to stack bottom. */ diff -Nru xen-4.6.0/xen/arch/x86/x86_64/mmconfig_64.c xen-4.6.5/xen/arch/x86/x86_64/mmconfig_64.c --- xen-4.6.0/xen/arch/x86/x86_64/mmconfig_64.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/x86_64/mmconfig_64.c 2017-03-07 16:19:05.000000000 +0000 @@ -198,6 +198,16 @@ return 0; } +bool_t pci_ro_mmcfg_decode(unsigned long mfn, unsigned int *seg, + unsigned int *bdf) +{ + const unsigned long *ro_map; + + return pci_mmcfg_decode(mfn, seg, bdf) && + ((ro_map = pci_get_ro_map(*seg)) == NULL || + !test_bit(*bdf, ro_map)); +} + int __init pci_mmcfg_arch_init(void) { int i; diff -Nru xen-4.6.0/xen/arch/x86/x86_64/mmconfig-shared.c xen-4.6.5/xen/arch/x86/x86_64/mmconfig-shared.c --- xen-4.6.0/xen/arch/x86/x86_64/mmconfig-shared.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/x86_64/mmconfig-shared.c 2017-03-07 16:19:05.000000000 +0000 @@ -182,10 +182,10 @@ int bus, i; static const u32 extcfg_regnum = 0x90; - static const u32 extcfg_enable_mask = 1<<31; - static const u32 extcfg_start_mask = 0xff<<16; + static const u32 extcfg_enable_mask = 1u << 31; + static const u32 extcfg_start_mask = 0xffu << 16; static const int extcfg_start_shift = 16; - static const u32 extcfg_size_mask = 0x3<<28; + static const u32 extcfg_size_mask = 3u << 28; static const int extcfg_size_shift = 28; static const int extcfg_sizebus[] = {0xff, 0x7f, 0x3f, 0x1f}; static const u32 extcfg_base_mask[] = {0x7ff8, 0x7ffc, 0x7ffe, 0x7fff}; diff -Nru xen-4.6.0/xen/arch/x86/x86_emulate/x86_emulate.c xen-4.6.5/xen/arch/x86/x86_emulate/x86_emulate.c --- xen-4.6.0/xen/arch/x86/x86_emulate/x86_emulate.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/x86_emulate/x86_emulate.c 2017-03-07 16:19:05.000000000 +0000 @@ -298,7 +298,11 @@ #define copy_REX_VEX(ptr, rex, vex) do { \ if ( (vex).opcx != vex_none ) \ + { \ + if ( !mode_64bit() ) \ + vex.reg |= 8; \ ptr[0] = 0xc4, ptr[1] = (vex).raw[0], ptr[2] = (vex).raw[1]; \ + } \ else if ( mode_64bit() ) \ ptr[1] = rex | REX_PREFIX; \ } while (0) @@ -366,6 +370,9 @@ /* Control register flags. */ #define CR0_PE (1<<0) +#define CR0_MP (1<<1) +#define CR0_EM (1<<2) +#define CR0_TS (1<<3) #define CR4_TSD (1<<2) /* EFLAGS bit definitions. */ @@ -393,6 +400,7 @@ #define EXC_OF 4 #define EXC_BR 5 #define EXC_UD 6 +#define EXC_NM 7 #define EXC_TS 10 #define EXC_NP 11 #define EXC_SS 12 @@ -570,7 +578,6 @@ /* Fetch next part of the instruction being emulated. */ #define insn_fetch_bytes(_size) \ ({ unsigned long _x = 0, _eip = _regs.eip; \ - if ( !mode_64bit() ) _eip = (uint32_t)_eip; /* ignore upper dword */ \ _regs.eip += (_size); /* real hardware doesn't truncate */ \ generate_exception_if((uint8_t)(_regs.eip - \ ctxt->regs->eip) > MAX_INST_LEN, \ @@ -589,7 +596,7 @@ }) #define truncate_ea(ea) truncate_word((ea), ad_bytes) -#define mode_64bit() (def_ad_bytes == 8) +#define mode_64bit() (ctxt->addr_size == 64) #define fail_if(p) \ do { \ @@ -643,14 +650,26 @@ #define jmp_rel(rel) \ do { \ - int _rel = (int)(rel); \ - _regs.eip += _rel; \ + unsigned long ip = _regs.eip + (int)(rel); \ if ( op_bytes == 2 ) \ - _regs.eip = (uint16_t)_regs.eip; \ + ip = (uint16_t)ip; \ else if ( !mode_64bit() ) \ - _regs.eip = (uint32_t)_regs.eip; \ + ip = (uint32_t)ip; \ + rc = ops->insn_fetch(x86_seg_cs, ip, NULL, 0, ctxt); \ + if ( rc ) goto done; \ + _regs.eip = ip; \ } while (0) +#define validate_far_branch(cs, ip) \ + generate_exception_if(in_longmode(ctxt, ops) && (cs)->attr.fields.l \ + ? !is_canonical_address(ip) \ + : (ip) > (cs)->limit, EXC_GP, 0) + +#define commit_far_branch(cs, ip) ({ \ + validate_far_branch(cs, ip); \ + ops->write_segment(x86_seg_cs, cs, ctxt); \ +}) + struct fpu_insn_ctxt { uint8_t insn_bytes; uint8_t exn_raised; @@ -663,10 +682,45 @@ regs->eip += fic->insn_bytes; } +static int _get_fpu( + enum x86_emulate_fpu_type type, + struct fpu_insn_ctxt *fic, + struct x86_emulate_ctxt *ctxt, + const struct x86_emulate_ops *ops) +{ + int rc; + + fic->exn_raised = 0; + + fail_if(!ops->get_fpu); + rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt); + + if ( rc == X86EMUL_OKAY ) + { + unsigned long cr0; + + fail_if(!ops->read_cr); + rc = ops->read_cr(0, &cr0, ctxt); + if ( rc != X86EMUL_OKAY ) + return rc; + if ( cr0 & CR0_EM ) + { + generate_exception_if(type == X86EMUL_FPU_fpu, EXC_NM, -1); + generate_exception_if(type == X86EMUL_FPU_mmx, EXC_UD, -1); + generate_exception_if(type == X86EMUL_FPU_xmm, EXC_UD, -1); + } + generate_exception_if((cr0 & CR0_TS) && + (type != X86EMUL_FPU_wait || (cr0 & CR0_MP)), + EXC_NM, -1); + } + + done: + return rc; +} + #define get_fpu(_type, _fic) \ -do{ (_fic)->exn_raised = 0; \ - fail_if(ops->get_fpu == NULL); \ - rc = ops->get_fpu(fpu_handle_exception, _fic, _type, ctxt); \ +do { \ + rc = _get_fpu(_type, _fic, ctxt, ops); \ if ( rc ) goto done; \ } while (0) #define _put_fpu() \ @@ -1099,52 +1153,75 @@ realmode_load_seg( enum x86_segment seg, uint16_t sel, + struct segment_register *sreg, struct x86_emulate_ctxt *ctxt, const struct x86_emulate_ops *ops) { - struct segment_register reg; - int rc; - - if ( (rc = ops->read_segment(seg, ®, ctxt)) != 0 ) - return rc; + int rc = ops->read_segment(seg, sreg, ctxt); - reg.sel = sel; - reg.base = (uint32_t)sel << 4; + if ( !rc ) + { + sreg->sel = sel; + sreg->base = (uint32_t)sel << 4; + } - return ops->write_segment(seg, ®, ctxt); + return rc; } static int protmode_load_seg( enum x86_segment seg, uint16_t sel, bool_t is_ret, + struct segment_register *sreg, struct x86_emulate_ctxt *ctxt, const struct x86_emulate_ops *ops) { - struct segment_register desctab, ss, segr; + struct segment_register desctab; struct { uint32_t a, b; } desc; - uint8_t dpl, rpl, cpl; - uint32_t new_desc_b, a_flag = 0x100; + uint8_t dpl, rpl; + int cpl = get_cpl(ctxt, ops); + uint32_t a_flag = 0x100; int rc, fault_type = EXC_GP; + if ( cpl < 0 ) + return X86EMUL_UNHANDLEABLE; + /* NULL selector? */ if ( (sel & 0xfffc) == 0 ) { - if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) ) + switch ( seg ) + { + case x86_seg_ss: + if ( mode_64bit() && (cpl != 3) && (cpl == sel) ) + default: + break; + /* fall through */ + case x86_seg_cs: + case x86_seg_tr: goto raise_exn; - memset(&segr, 0, sizeof(segr)); - return ops->write_segment(seg, &segr, ctxt); + } + memset(sreg, 0, sizeof(*sreg)); + sreg->sel = sel; + + /* Since CPL == SS.DPL, we need to put back DPL. */ + if ( seg == x86_seg_ss ) + sreg->attr.fields.dpl = sel; + + return X86EMUL_OKAY; } /* System segment descriptors must reside in the GDT. */ if ( !is_x86_user_segment(seg) && (sel & 4) ) goto raise_exn; - if ( (rc = ops->read_segment(x86_seg_ss, &ss, ctxt)) || - (rc = ops->read_segment((sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, + if ( (rc = ops->read_segment((sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab, ctxt)) ) return rc; + /* Segment not valid for use (cooked meaning of .p)? */ + if ( !desctab.attr.fields.p ) + goto raise_exn; + /* Check against descriptor table limit. */ if ( ((sel & 0xfff8) + 7) > desctab.limit ) goto raise_exn; @@ -1153,13 +1230,6 @@ &desc, sizeof(desc), ctxt)) ) return rc; - /* Segment present in memory? */ - if ( !(desc.b & (1u<<15)) ) - { - fault_type = EXC_NP; - goto raise_exn; - } - if ( !is_x86_user_segment(seg) ) { /* System segments must have S flag == 0. */ @@ -1175,7 +1245,6 @@ dpl = (desc.b >> 13) & 3; rpl = sel & 3; - cpl = ss.attr.fields.dpl; switch ( seg ) { @@ -1195,7 +1264,11 @@ /* Non-conforming segment: check RPL and DPL against CPL. */ : rpl > cpl || dpl != cpl ) goto raise_exn; - /* 64-bit code segments (L bit set) must have D bit clear. */ + /* + * 64-bit code segments (L bit set) must have D bit clear. + * Experimentally in long mode, the L and D bits are checked before + * the Present bit. + */ if ( in_longmode(ctxt, ops) && (desc.b & (1 << 21)) && (desc.b & (1 << 22)) ) goto raise_exn; @@ -1212,7 +1285,8 @@ /* LDT system segment? */ if ( (desc.b & (15u<<8)) != (2u<<8) ) goto raise_exn; - goto skip_accessed_flag; + a_flag = 0; + break; case x86_seg_tr: /* Available TSS system segment? */ if ( (desc.b & (15u<<8)) != (9u<<8) ) @@ -1230,28 +1304,36 @@ break; } + /* Segment present in memory? */ + if ( !(desc.b & (1 << 15)) ) + { + fault_type = seg != x86_seg_ss ? EXC_NP : EXC_SS; + goto raise_exn; + } + /* Ensure Accessed flag is set. */ - new_desc_b = desc.b | a_flag; - if ( !(desc.b & a_flag) && - ((rc = ops->cmpxchg( - x86_seg_none, desctab.base + (sel & 0xfff8) + 4, - &desc.b, &new_desc_b, 4, ctxt)) != 0) ) - return rc; + if ( a_flag && !(desc.b & a_flag) ) + { + uint32_t new_desc_b = desc.b | a_flag; - /* Force the Accessed flag in our local copy. */ - desc.b |= a_flag; + if ( (rc = ops->cmpxchg(x86_seg_none, desctab.base + (sel & 0xfff8) + 4, + &desc.b, &new_desc_b, 4, ctxt)) != 0 ) + return rc; + + /* Force the Accessed flag in our local copy. */ + desc.b = new_desc_b; + } - skip_accessed_flag: - segr.base = (((desc.b << 0) & 0xff000000u) | - ((desc.b << 16) & 0x00ff0000u) | - ((desc.a >> 16) & 0x0000ffffu)); - segr.attr.bytes = (((desc.b >> 8) & 0x00ffu) | - ((desc.b >> 12) & 0x0f00u)); - segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu); - if ( segr.attr.fields.g ) - segr.limit = (segr.limit << 12) | 0xfffu; - segr.sel = sel; - return ops->write_segment(seg, &segr, ctxt); + sreg->base = (((desc.b << 0) & 0xff000000u) | + ((desc.b << 16) & 0x00ff0000u) | + ((desc.a >> 16) & 0x0000ffffu)); + sreg->attr.bytes = (((desc.b >> 8) & 0x00ffu) | + ((desc.b >> 12) & 0x0f00u)); + sreg->limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu); + if ( sreg->attr.fields.g ) + sreg->limit = (sreg->limit << 12) | 0xfffu; + sreg->sel = sel; + return X86EMUL_OKAY; raise_exn: if ( ops->inject_hw_exception == NULL ) @@ -1265,17 +1347,29 @@ load_seg( enum x86_segment seg, uint16_t sel, bool_t is_ret, + struct segment_register *sreg, struct x86_emulate_ctxt *ctxt, const struct x86_emulate_ops *ops) { + struct segment_register reg; + int rc; + if ( (ops->read_segment == NULL) || (ops->write_segment == NULL) ) return X86EMUL_UNHANDLEABLE; + if ( !sreg ) + sreg = ® + if ( in_protmode(ctxt, ops) ) - return protmode_load_seg(seg, sel, is_ret, ctxt, ops); + rc = protmode_load_seg(seg, sel, is_ret, sreg, ctxt, ops); + else + rc = realmode_load_seg(seg, sel, sreg, ctxt, ops); - return realmode_load_seg(seg, sel, ctxt, ops); + if ( !rc && sreg == ® ) + rc = ops->write_segment(seg, sreg, ctxt); + + return rc; } void * @@ -1318,21 +1412,28 @@ return p; } -#define decode_segment_failed x86_seg_tr -static enum x86_segment -decode_segment(uint8_t modrm_reg) +static bool_t is_aligned(enum x86_segment seg, unsigned long offs, + unsigned int size, struct x86_emulate_ctxt *ctxt, + const struct x86_emulate_ops *ops) { - switch ( modrm_reg ) + struct segment_register reg; + + /* Expecting powers of two only. */ + ASSERT(!(size & (size - 1))); + + if ( mode_64bit() && seg < x86_seg_fs ) + memset(®, 0, sizeof(reg)); + else { - case 0: return x86_seg_es; - case 1: return x86_seg_cs; - case 2: return x86_seg_ss; - case 3: return x86_seg_ds; - case 4: return x86_seg_fs; - case 5: return x86_seg_gs; - default: break; + /* No alignment checking when we have no way to read segment data. */ + if ( !ops->read_segment ) + return 1; + + if ( ops->read_segment(seg, ®, ctxt) != X86EMUL_OKAY ) + return 0; } - return decode_segment_failed; + + return !((reg.base + offs) & (size - 1)); } /* Inject a software interrupt/exception, emulating if needed. */ @@ -1378,10 +1479,16 @@ { if ( !in_realmode(ctxt, ops) ) { - unsigned int idte_size = (ctxt->addr_size == 64) ? 16 : 8; - unsigned int idte_offset = vector * idte_size; + unsigned int idte_size, idte_offset; struct segment_register idtr; uint32_t idte_ctl; + int lm = in_longmode(ctxt, ops); + + if ( lm < 0 ) + return X86EMUL_UNHANDLEABLE; + + idte_size = lm ? 16 : 8; + idte_offset = vector * idte_size; /* icebp sets the External Event bit despite being an instruction. */ error_code = (vector << 3) | ECODE_IDT | @@ -1409,8 +1516,9 @@ * Should strictly speaking read all 8/16 bytes of an entry, * but we currently only care about the dpl and present bits. */ - ops->read(x86_seg_none, idtr.base + idte_offset + 4, - &idte_ctl, sizeof(idte_ctl), ctxt); + if ( (rc = ops->read(x86_seg_none, idtr.base + idte_offset + 4, + &idte_ctl, sizeof(idte_ctl), ctxt)) ) + goto done; /* Is this entry present? */ if ( !(idte_ctl & (1u << 15)) ) @@ -1444,6 +1552,16 @@ return ops->inject_hw_exception(fault_type, error_code, ctxt); } +int x86emul_unhandleable_rw( + enum x86_segment seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) +{ + return X86EMUL_UNHANDLEABLE; +} + int x86_emulate( struct x86_emulate_ctxt *ctxt, @@ -1457,6 +1575,7 @@ union vex vex = {}; unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes; bool_t lock_prefix = 0; + bool_t tf = !!(ctxt->regs->eflags & EFLG_TF); int override_seg = -1, rc = X86EMUL_OKAY; struct operand src = { .reg = REG_POISON }; struct operand dst = { .reg = REG_POISON }; @@ -1570,16 +1689,21 @@ default: BUG(); case 2: - if ( in_realmode(ctxt, ops) || (_regs.eflags & EFLG_VM) ) + if ( _regs.eflags & EFLG_VM ) break; /* fall through */ case 4: - if ( modrm_mod != 3 ) + if ( modrm_mod != 3 || in_realmode(ctxt, ops) ) break; /* fall through */ case 8: /* VEX */ generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1); + /* + * With operand size override disallowed (see above), op_bytes + * should not have changed from its default. + */ + ASSERT(op_bytes == def_op_bytes); vex.raw[0] = modrm; if ( b & 1 ) @@ -1605,8 +1729,14 @@ op_bytes = 8; } } + else + { + /* Operand size fixed at 4 (no override via W bit). */ + op_bytes = 4; + vex.b = 1; + } } - if ( mode_64bit() && !vex.r ) + if ( !vex.r ) rex_prefix |= REX_R; fail_if(vex.opcx != vex_0f); @@ -1903,6 +2033,12 @@ else { /* + * Instructions such as bt can reference an arbitrary offset from + * their memory operand, but the instruction doing the actual + * emulation needs the appropriate op_bytes read from memory. + * Adjust both the source register and memory operand to make an + * equivalent instruction. + * * EA += BitOffset DIV op_bytes*8 * BitOffset = BitOffset MOD op_bytes*8 * DIV truncates towards negative infinity. @@ -1914,14 +2050,15 @@ src.val = (int32_t)src.val; if ( (long)src.val < 0 ) { - unsigned long byte_offset; - byte_offset = op_bytes + (((-src.val-1) >> 3) & ~(op_bytes-1)); + unsigned long byte_offset = + op_bytes + (((-src.val - 1) >> 3) & ~(op_bytes - 1L)); + ea.mem.off -= byte_offset; src.val = (byte_offset << 3) + src.val; } else { - ea.mem.off += (src.val >> 3) & ~(op_bytes - 1); + ea.mem.off += (src.val >> 3) & ~(op_bytes - 1L); src.val &= (op_bytes << 3) - 1; } } @@ -1956,6 +2093,8 @@ switch ( b ) { + struct segment_register cs; + case 0x00 ... 0x05: add: /* add */ emulate_2op_SrcV("add", src, dst, _regs.eflags); break; @@ -1997,13 +2136,8 @@ fail_if(ops->read_segment == NULL); if ( (rc = ops->read_segment(src.val, ®, ctxt)) != 0 ) return rc; - /* 64-bit mode: PUSH defaults to a 64-bit operand. */ - if ( mode_64bit() && (op_bytes == 4) ) - op_bytes = 8; - if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), - ®.sel, op_bytes, ctxt)) != 0 ) - goto done; - break; + src.val = reg.sel; + goto push; } case 0x07: /* pop %%es */ @@ -2017,7 +2151,7 @@ if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), &dst.val, op_bytes, ctxt, ops)) != 0 ) goto done; - if ( (rc = load_seg(src.val, dst.val, 0, ctxt, ops)) != 0 ) + if ( (rc = load_seg(src.val, dst.val, 0, NULL, ctxt, ops)) != 0 ) return rc; break; @@ -2356,8 +2490,8 @@ case 0x8c: /* mov Sreg,r/m */ { struct segment_register reg; - enum x86_segment seg = decode_segment(modrm_reg); - generate_exception_if(seg == decode_segment_failed, EXC_UD, -1); + enum x86_segment seg = modrm_reg & 7; /* REX.R is ignored. */ + generate_exception_if(!is_x86_user_segment(seg), EXC_UD, -1); fail_if(ops->read_segment == NULL); if ( (rc = ops->read_segment(seg, ®, ctxt)) != 0 ) goto done; @@ -2368,10 +2502,10 @@ } case 0x8e: /* mov r/m,Sreg */ { - enum x86_segment seg = decode_segment(modrm_reg); - generate_exception_if(seg == decode_segment_failed, EXC_UD, -1); - generate_exception_if(seg == x86_seg_cs, EXC_UD, -1); - if ( (rc = load_seg(seg, src.val, 0, ctxt, ops)) != 0 ) + enum x86_segment seg = modrm_reg & 7; /* REX.R is ignored. */ + generate_exception_if(!is_x86_user_segment(seg) || + seg == x86_seg_cs, EXC_UD, -1); + if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 ) goto done; if ( seg == x86_seg_ss ) ctxt->retire.flags.mov_ss = 1; @@ -2446,30 +2580,47 @@ sel = insn_fetch_type(uint16_t); if ( (rc = ops->read_segment(x86_seg_cs, ®, ctxt)) || + (rc = load_seg(x86_seg_cs, sel, 0, &cs, ctxt, ops)) || + (validate_far_branch(&cs, eip), + src.val = reg.sel, + rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), + &src.val, op_bytes, ctxt)) || (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), - ®.sel, op_bytes, ctxt)) || - (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), - &_regs.eip, op_bytes, ctxt)) ) + &_regs.eip, op_bytes, ctxt)) || + (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) ) goto done; - if ( (rc = load_seg(x86_seg_cs, sel, 0, ctxt, ops)) != 0 ) - goto done; _regs.eip = eip; break; } case 0x9b: /* wait/fwait */ - emulate_fpu_insn("fwait"); + { + struct fpu_insn_ctxt fic = { .insn_bytes = 1 }; + + get_fpu(X86EMUL_FPU_wait, &fic); + asm volatile ( "fwait" ::: "memory" ); + put_fpu(&fic); break; + } case 0x9c: /* pushf */ - src.val = _regs.eflags; + generate_exception_if((_regs.eflags & EFLG_VM) && + MASK_EXTR(_regs.eflags, EFLG_IOPL) != 3, + EXC_GP, 0); + src.val = _regs.eflags & ~(EFLG_VM | EFLG_RF); goto push; case 0x9d: /* popf */ { uint32_t mask = EFLG_VIP | EFLG_VIF | EFLG_VM; + if ( !mode_ring0() ) + { + generate_exception_if((_regs.eflags & EFLG_VM) && + MASK_EXTR(_regs.eflags, EFLG_IOPL) != 3, + EXC_GP, 0); mask |= EFLG_IOPL; + } if ( !mode_iopl() ) mask |= EFLG_IF; /* 64-bit mode: POP defaults to a 64-bit operand. */ @@ -2671,7 +2822,8 @@ int offset = (b == 0xc2) ? insn_fetch_type(uint16_t) : 0; op_bytes = ((op_bytes == 4) && mode_64bit()) ? 8 : op_bytes; if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes + offset), - &dst.val, op_bytes, ctxt, ops)) != 0 ) + &dst.val, op_bytes, ctxt, ops)) != 0 || + (rc = ops->insn_fetch(x86_seg_cs, dst.val, NULL, 0, ctxt)) ) goto done; _regs.eip = dst.val; break; @@ -2686,7 +2838,7 @@ if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes, &sel, 2, ctxt, ops)) != 0 ) goto done; - if ( (rc = load_seg(dst.val, sel, 0, ctxt, ops)) != 0 ) + if ( (rc = load_seg(dst.val, sel, 0, NULL, ctxt, ops)) != 0 ) goto done; dst.val = src.val; break; @@ -2760,7 +2912,8 @@ &dst.val, op_bytes, ctxt, ops)) || (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes + offset), &src.val, op_bytes, ctxt, ops)) || - (rc = load_seg(x86_seg_cs, src.val, 1, ctxt, ops)) ) + (rc = load_seg(x86_seg_cs, src.val, 1, &cs, ctxt, ops)) || + (rc = commit_far_branch(&cs, dst.val)) ) goto done; _regs.eip = dst.val; break; @@ -2789,7 +2942,7 @@ goto swint; case 0xcf: /* iret */ { - unsigned long cs, eip, eflags; + unsigned long sel, eip, eflags; uint32_t mask = EFLG_VIP | EFLG_VIF | EFLG_VM; if ( !mode_ring0() ) mask |= EFLG_IOPL; @@ -2799,7 +2952,7 @@ if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), &eip, op_bytes, ctxt, ops)) || (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), - &cs, op_bytes, ctxt, ops)) || + &sel, op_bytes, ctxt, ops)) || (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), &eflags, op_bytes, ctxt, ops)) ) goto done; @@ -2809,7 +2962,8 @@ _regs.eflags &= mask; _regs.eflags |= (uint32_t)(eflags & ~mask) | 0x02; _regs.eip = eip; - if ( (rc = load_seg(x86_seg_cs, cs, 1, ctxt, ops)) != 0 ) + if ( (rc = load_seg(x86_seg_cs, sel, 1, &cs, ctxt, ops)) || + (rc = commit_far_branch(&cs, eip)) ) goto done; break; } @@ -3439,7 +3593,8 @@ generate_exception_if(mode_64bit(), EXC_UD, -1); eip = insn_fetch_bytes(op_bytes); sel = insn_fetch_type(uint16_t); - if ( (rc = load_seg(x86_seg_cs, sel, 0, ctxt, ops)) != 0 ) + if ( (rc = load_seg(x86_seg_cs, sel, 0, &cs, ctxt, ops)) || + (rc = commit_far_branch(&cs, eip)) ) goto done; _regs.eip = eip; break; @@ -3709,10 +3864,14 @@ break; case 2: /* call (near) */ dst.val = _regs.eip; + if ( (rc = ops->insn_fetch(x86_seg_cs, src.val, NULL, 0, ctxt)) ) + goto done; _regs.eip = src.val; src.val = dst.val; goto push; case 4: /* jmp (near) */ + if ( (rc = ops->insn_fetch(x86_seg_cs, src.val, NULL, 0, ctxt)) ) + goto done; _regs.eip = src.val; dst.type = OP_NONE; break; @@ -3731,14 +3890,18 @@ struct segment_register reg; fail_if(ops->read_segment == NULL); if ( (rc = ops->read_segment(x86_seg_cs, ®, ctxt)) || + (rc = load_seg(x86_seg_cs, sel, 0, &cs, ctxt, ops)) || + (validate_far_branch(&cs, src.val), + dst.val = reg.sel, + rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), + &dst.val, op_bytes, ctxt)) || (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), - ®.sel, op_bytes, ctxt)) || - (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), - &_regs.eip, op_bytes, ctxt)) ) + &_regs.eip, op_bytes, ctxt)) || + (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) ) goto done; } - - if ( (rc = load_seg(x86_seg_cs, sel, 0, ctxt, ops)) != 0 ) + else if ( (rc = load_seg(x86_seg_cs, sel, 0, &cs, ctxt, ops)) || + (rc = commit_far_branch(&cs, src.val)) ) goto done; _regs.eip = src.val; @@ -3786,13 +3949,17 @@ } no_writeback: - /* Inject #DB if single-step tracing was enabled at instruction start. */ - if ( (ctxt->regs->eflags & EFLG_TF) && (rc == X86EMUL_OKAY) && - (ops->inject_hw_exception != NULL) ) + /* Should a singlestep #DB be raised? */ + if ( tf && (rc == X86EMUL_OKAY) && (ops->inject_hw_exception != NULL) ) rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION; /* Commit shadow register state. */ _regs.eflags &= ~EFLG_RF; + + /* Zero the upper 32 bits of %rip if not in long mode. */ + if ( def_ad_bytes < sizeof(_regs.eip) ) + _regs.eip = (uint32_t)_regs.eip; + *ctxt->regs = _regs; done: @@ -3808,7 +3975,7 @@ generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1); generate_exception_if(!mode_ring0(), EXC_GP, 0); if ( (rc = load_seg((modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr, - src.val, 0, ctxt, ops)) != 0 ) + src.val, 0, NULL, ctxt, ops)) != 0 ) goto done; break; @@ -3838,7 +4005,7 @@ generate_exception_if(lock_prefix | rep_prefix() | (vex.pfx == vex_66), EXC_UD, -1); fail_if(ops->vmfunc == NULL); - if ( (rc = ops->vmfunc(ctxt) != X86EMUL_OKAY) ) + if ( (rc = ops->vmfunc(ctxt)) != X86EMUL_OKAY ) goto done; goto no_writeback; } @@ -3853,12 +4020,17 @@ x86_seg_idtr : x86_seg_gdtr, ®, ctxt)) ) goto done; - if ( op_bytes == 2 ) + if ( mode_64bit() ) + op_bytes = 8; + else if ( op_bytes == 2 ) + { reg.base &= 0xffffff; - if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0, - ®.limit, 2, ctxt)) || - (rc = ops->write(ea.mem.seg, ea.mem.off+2, - ®.base, mode_64bit() ? 8 : 4, ctxt)) ) + op_bytes = 4; + } + if ( (rc = ops->write(ea.mem.seg, ea.mem.off, ®.limit, + 2, ctxt)) != X86EMUL_OKAY || + (rc = ops->write(ea.mem.seg, ea.mem.off + 2, ®.base, + op_bytes, ctxt)) != X86EMUL_OKAY ) goto done; break; case 2: /* lgdt */ @@ -3872,9 +4044,10 @@ (rc = read_ulong(ea.mem.seg, ea.mem.off+2, &base, mode_64bit() ? 8 : 4, ctxt, ops)) ) goto done; + generate_exception_if(!is_canonical_address(base), EXC_GP, 0); reg.base = base; reg.limit = limit; - if ( op_bytes == 2 ) + if ( !mode_64bit() && op_bytes == 2 ) reg.base &= 0xffffff; if ( (rc = ops->write_segment((modrm_reg & 1) ? x86_seg_idtr : x86_seg_gdtr, @@ -3976,6 +4149,23 @@ (rc = ops->write_segment(x86_seg_ss, &ss, ctxt)) ) goto done; + /* + * SYSCALL (unlike most instructions) evaluates its singlestep action + * based on the resulting EFLG_TF, not the starting EFLG_TF. + * + * As the #DB is raised after the CPL change and before the OS can + * switch stack, it is a large risk for privilege escalation. + * + * 64bit kernels should mask EFLG_TF in MSR_FMASK to avoid any + * vulnerability. Running the #DB handler on an IST stack is also a + * mitigation. + * + * 32bit kernels have no ability to mask EFLG_TF at all. Their only + * mitigation is to use a task gate for handling #DB (or to not use + * enable EFER.SCE to start with). + */ + tf = !!(_regs.eflags & EFLG_TF); + break; } @@ -4075,6 +4265,8 @@ if ( !rc && (b & 1) && (ea.type == OP_MEM) ) rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp, ea.bytes, ctxt); + if ( rc ) + goto done; dst.type = OP_NONE; break; } @@ -4157,9 +4349,8 @@ } case 0x40 ... 0x4f: /* cmovcc */ - dst.val = src.val; - if ( !test_cc(b, _regs.eflags) ) - dst.type = OP_NONE; + if ( test_cc(b, _regs.eflags) ) + dst.val = src.val; break; case 0x34: /* sysenter */ { @@ -4225,6 +4416,9 @@ goto done; generate_exception_if(!(msr_content & 0xfffc), EXC_GP, 0); + generate_exception_if(user64 && (!is_canonical_address(_regs.edx) || + !is_canonical_address(_regs.ecx)), + EXC_GP, 0); cs.sel = (msr_content | 3) + /* SELECTOR_RPL_MASK */ (user64 ? 32 : 16); @@ -4326,6 +4520,8 @@ if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) ) rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp, ea.bytes, ctxt); + if ( rc ) + goto done; dst.type = OP_NONE; break; } @@ -4390,6 +4586,7 @@ break; case 0xa3: bt: /* bt */ + generate_exception_if(lock_prefix, EXC_UD, 0); emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags); dst.type = OP_NONE; break; @@ -4591,7 +4788,7 @@ case 0xc3: /* movnti */ /* Ignore the non-temporal hint for now. */ vcpu_must_have_sse2(); - generate_exception_if(dst.bytes <= 2, EXC_UD, -1); + generate_exception_if(vex.pfx, EXC_UD, -1); dst.val = src.val; break; @@ -4601,8 +4798,15 @@ generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1); generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); if ( op_bytes == 8 ) + { vcpu_must_have_cx16(); - op_bytes *= 2; + generate_exception_if(!is_aligned(ea.mem.seg, ea.mem.off, 16, + ctxt, ops), + EXC_GP, 0); + op_bytes = 16; + } + else + op_bytes = 8; /* Get actual old value. */ if ( (rc = ops->read(ea.mem.seg, ea.mem.off, old, op_bytes, @@ -4667,3 +4871,14 @@ put_stub(stub); return X86EMUL_UNHANDLEABLE; } + +static inline void build_assertions(void) +{ + /* Check the values against SReg3 encoding in opcode/ModRM bytes. */ + BUILD_BUG_ON(x86_seg_es != 0); + BUILD_BUG_ON(x86_seg_cs != 1); + BUILD_BUG_ON(x86_seg_ss != 2); + BUILD_BUG_ON(x86_seg_ds != 3); + BUILD_BUG_ON(x86_seg_fs != 4); + BUILD_BUG_ON(x86_seg_gs != 5); +} diff -Nru xen-4.6.0/xen/arch/x86/x86_emulate/x86_emulate.h xen-4.6.5/xen/arch/x86/x86_emulate/x86_emulate.h --- xen-4.6.0/xen/arch/x86/x86_emulate/x86_emulate.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/x86_emulate/x86_emulate.h 2017-03-07 16:19:05.000000000 +0000 @@ -29,11 +29,11 @@ /* Comprehensive enumeration of x86 segment registers. */ enum x86_segment { - /* General purpose. */ + /* General purpose. Matches the SReg3 encoding in opcode/ModRM bytes. */ + x86_seg_es, x86_seg_cs, x86_seg_ss, x86_seg_ds, - x86_seg_es, x86_seg_fs, x86_seg_gs, /* System. */ @@ -115,6 +115,7 @@ /* FPU sub-types which may be requested via ->get_fpu(). */ enum x86_emulate_fpu_type { X86EMUL_FPU_fpu, /* Standard FPU coprocessor instruction set */ + X86EMUL_FPU_wait, /* WAIT/FWAIT instruction */ X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */ X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */ X86EMUL_FPU_ymm /* AVX/XOP instruction set (%ymm0-%ymm7/15) */ @@ -430,6 +431,9 @@ } flags; uint8_t byte; } retire; + + /* Caller data that can be used by x86_emulate_ops' routines. */ + void *data; }; struct x86_emulate_stub { @@ -463,4 +467,13 @@ decode_register( uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs); +/* Unhandleable read, write or instruction fetch */ +int +x86emul_unhandleable_rw( + enum x86_segment seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); + #endif /* __X86_EMULATE_H__ */ diff -Nru xen-4.6.0/xen/arch/x86/xstate.c xen-4.6.5/xen/arch/x86/xstate.c --- xen-4.6.0/xen/arch/x86/xstate.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/arch/x86/xstate.c 2017-03-07 16:19:05.000000000 +0000 @@ -70,78 +70,83 @@ struct xsave_struct *ptr = v->arch.xsave_area; uint32_t hmask = mask >> 32; uint32_t lmask = mask; - int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1; + unsigned int fip_width = v->domain->arch.x87_fip_width; - if ( word_size <= 0 || !is_pv_32bit_vcpu(v) ) + if ( fip_width == 8 || !(mask & XSTATE_FP) ) { - typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel; - typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel; + if ( cpu_has_xsaveopt ) + asm volatile ( ".byte 0x48,0x0f,0xae,0x37" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else + asm volatile ( ".byte 0x48,0x0f,0xae,0x27" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + } + else if ( fip_width == 4 ) + { + if ( cpu_has_xsaveopt ) + asm volatile ( ".byte 0x0f,0xae,0x37" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else + asm volatile ( ".byte 0x0f,0xae,0x27" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + } + else + { + /* + * FIP/FDP may not be written in some cases (e.g., if XSAVEOPT/XSAVES + * is used, or on AMD CPUs if an exception isn't pending). + * + * To tell if the hardware writes these fields, poison the FIP field. + * The poison is + * a) non-canonical + * b) non-zero for the reserved part of a 32-bit FCS:FIP + * c) random with a vanishingly small probability to match a value the + * hardware may write (1e-19) even if it did not canonicalize the + * 64-bit FIP or zero-extend the 16-bit FCS. + */ + uint64_t orig_fip = ptr->fpu_sse.fip.addr; + const uint64_t bad_fip = 0x6a3f5c4b13a533f6; + + ptr->fpu_sse.fip.addr = bad_fip; if ( cpu_has_xsaveopt ) - { - /* - * xsaveopt may not write the FPU portion even when the respective - * mask bit is set. For the check further down to work we hence - * need to put the save image back into the state that it was in - * right after the previous xsaveopt. - */ - if ( word_size > 0 && - (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 || - ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 2) ) - { - ptr->fpu_sse.fip.sel = 0; - ptr->fpu_sse.fdp.sel = 0; - } asm volatile ( ".byte 0x48,0x0f,0xae,0x37" : "=m" (*ptr) : "a" (lmask), "d" (hmask), "D" (ptr) ); - } else asm volatile ( ".byte 0x48,0x0f,0xae,0x27" : "=m" (*ptr) : "a" (lmask), "d" (hmask), "D" (ptr) ); - if ( !(mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) || - /* - * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception - * is pending. - */ - (!(ptr->fpu_sse.fsw & 0x0080) && - boot_cpu_data.x86_vendor == X86_VENDOR_AMD) ) + /* FIP/FDP not updated? Restore the old FIP value. */ + if ( ptr->fpu_sse.fip.addr == bad_fip ) { - if ( cpu_has_xsaveopt && word_size > 0 ) - { - ptr->fpu_sse.fip.sel = fcs; - ptr->fpu_sse.fdp.sel = fds; - } + ptr->fpu_sse.fip.addr = orig_fip; return; } - if ( word_size > 0 && - !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) ) + /* + * If the FIP/FDP[63:32] are both zero, it is safe to use the + * 32-bit restore to also restore the selectors. + */ + if ( !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) ) { struct ix87_env fpu_env; asm volatile ( "fnstenv %0" : "=m" (fpu_env) ); ptr->fpu_sse.fip.sel = fpu_env.fcs; ptr->fpu_sse.fdp.sel = fpu_env.fds; - word_size = 4; + fip_width = 4; } - } - else - { - if ( cpu_has_xsaveopt ) - asm volatile ( ".byte 0x0f,0xae,0x37" - : "=m" (*ptr) - : "a" (lmask), "d" (hmask), "D" (ptr) ); else - asm volatile ( ".byte 0x0f,0xae,0x27" - : "=m" (*ptr) - : "a" (lmask), "d" (hmask), "D" (ptr) ); - word_size = 4; + fip_width = 8; } - if ( word_size >= 0 ) - ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size; + if ( mask & XSTATE_FP ) + ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = fip_width; } void xrstor(struct vcpu *v, uint64_t mask) @@ -158,7 +163,7 @@ * data block as a safe address because it should be in L1. */ if ( (mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) && - !(ptr->fpu_sse.fsw & 0x0080) && + !(ptr->fpu_sse.fsw & ~ptr->fpu_sse.fcw & 0x003f) && boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) asm volatile ( "fnclex\n\t" /* clear exceptions */ "ffree %%st(7)\n\t" /* clear stack tag */ diff -Nru xen-4.6.0/xen/common/compat/memory.c xen-4.6.5/xen/common/compat/memory.c --- xen-4.6.0/xen/common/compat/memory.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/compat/memory.c 2017-03-07 16:19:05.000000000 +0000 @@ -312,6 +312,11 @@ break; } + case XENMEM_access_op: + return mem_access_memop(cmd, + guest_handle_cast(compat, + xen_mem_access_op_t)); + case XENMEM_get_vnumainfo: { enum XLAT_vnuma_topology_info_vdistance vdistance = @@ -487,10 +492,6 @@ break; } - case XENMEM_access_op: - rc = mem_access_memop(cmd, guest_handle_cast(compat, xen_mem_access_op_t)); - break; - case XENMEM_add_to_physmap_batch: start_extent = end_extent; break; diff -Nru xen-4.6.0/xen/common/device_tree.c xen-4.6.5/xen/common/device_tree.c --- xen-4.6.0/xen/common/device_tree.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/device_tree.c 2017-03-07 16:19:05.000000000 +0000 @@ -849,7 +849,8 @@ ranges = dt_get_property(dev, "ranges", &rlen); if ( ranges == NULL ) { - printk(XENLOG_ERR "DT: no ranges; cannot enumerate\n"); + printk(XENLOG_ERR "DT: no ranges; cannot enumerate %s\n", + dev->full_name); return -EINVAL; } if ( rlen == 0 ) /* Nothing to do */ diff -Nru xen-4.6.0/xen/common/domain.c xen-4.6.5/xen/common/domain.c --- xen-4.6.0/xen/common/domain.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/domain.c 2017-03-07 16:19:05.000000000 +0000 @@ -833,6 +833,7 @@ xsm_free_security_domain(d); free_cpumask_var(d->domain_dirty_cpumask); + xfree(d->vcpu); free_domain_struct(d); send_global_virq(VIRQ_DOM_EXC); diff -Nru xen-4.6.0/xen/common/domctl.c xen-4.6.5/xen/common/domctl.c --- xen-4.6.0/xen/common/domctl.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/domctl.c 2017-03-07 16:19:05.000000000 +0000 @@ -216,54 +216,6 @@ memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t)); } -static unsigned int default_vcpu0_location(cpumask_t *online) -{ - struct domain *d; - struct vcpu *v; - unsigned int i, cpu, nr_cpus, *cnt; - cpumask_t cpu_exclude_map; - - /* Do an initial CPU placement. Pick the least-populated CPU. */ - nr_cpus = cpumask_last(&cpu_online_map) + 1; - cnt = xzalloc_array(unsigned int, nr_cpus); - if ( cnt ) - { - rcu_read_lock(&domlist_read_lock); - for_each_domain ( d ) - for_each_vcpu ( d, v ) - if ( !test_bit(_VPF_down, &v->pause_flags) - && ((cpu = v->processor) < nr_cpus) ) - cnt[cpu]++; - rcu_read_unlock(&domlist_read_lock); - } - - /* - * If we're on a HT system, we only auto-allocate to a non-primary HT. We - * favour high numbered CPUs in the event of a tie. - */ - cpumask_copy(&cpu_exclude_map, per_cpu(cpu_sibling_mask, 0)); - cpu = cpumask_first(&cpu_exclude_map); - i = cpumask_next(cpu, &cpu_exclude_map); - if ( i < nr_cpu_ids ) - cpu = i; - for_each_cpu(i, online) - { - if ( cpumask_test_cpu(i, &cpu_exclude_map) ) - continue; - if ( (i == cpumask_first(per_cpu(cpu_sibling_mask, i))) && - (cpumask_next(i, per_cpu(cpu_sibling_mask, i)) < nr_cpu_ids) ) - continue; - cpumask_or(&cpu_exclude_map, &cpu_exclude_map, - per_cpu(cpu_sibling_mask, i)); - if ( !cnt || cnt[i] <= cnt[cpu] ) - cpu = i; - } - - xfree(cnt); - - return cpu; -} - bool_t domctl_lock_acquire(void) { /* @@ -687,7 +639,7 @@ continue; cpu = (i == 0) ? - default_vcpu0_location(online) : + cpumask_any(online) : cpumask_cycle(d->vcpu[i-1]->processor, online); if ( alloc_vcpu(d, i, cpu) == NULL ) diff -Nru xen-4.6.0/xen/common/efi/efi.h xen-4.6.5/xen/common/efi/efi.h --- xen-4.6.0/xen/common/efi/efi.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/efi/efi.h 2017-03-07 16:19:05.000000000 +0000 @@ -36,6 +36,3 @@ extern UINT64 efi_boot_max_var_store_size, efi_boot_remain_var_store_size, efi_boot_max_var_size; - -unsigned long efi_rs_enter(void); -void efi_rs_leave(unsigned long); diff -Nru xen-4.6.0/xen/common/efi/runtime.c xen-4.6.5/xen/common/efi/runtime.c --- xen-4.6.0/xen/common/efi/runtime.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/efi/runtime.c 2017-03-07 16:19:05.000000000 +0000 @@ -8,6 +8,25 @@ DEFINE_XEN_GUEST_HANDLE(CHAR16); +struct efi_rs_state { +#ifdef CONFIG_X86 + /* + * The way stacks get set up leads to them always being on an 8-byte + * boundary not evenly divisible by 16 (see asm-x86/current.h). The EFI ABI, + * just like the CPU one, however requires stacks to be 16-byte aligned + * before every function call. Since the compiler assumes this (unless + * passing it -mpreferred-stack-boundary=3), it wouldn't generate code to + * align the stack to 16 bytes even if putting a 16-byte aligned object + * there. Hence we need to force larger than 16-byte alignment, even if we + * don't strictly need that. + */ + unsigned long __aligned(32) cr3; +#endif +}; + +struct efi_rs_state efi_rs_enter(void); +void efi_rs_leave(struct efi_rs_state *); + #ifndef COMPAT #ifdef CONFIG_ARM /* Disabled until runtime services implemented */ @@ -52,17 +71,19 @@ const struct efi_pci_rom *__read_mostly efi_pci_roms; #ifndef CONFIG_ARM /* TODO - disabled until implemented on ARM */ -unsigned long efi_rs_enter(void) + +struct efi_rs_state efi_rs_enter(void) { static const u16 fcw = FCW_DEFAULT; static const u32 mxcsr = MXCSR_DEFAULT; - unsigned long cr3 = read_cr3(); + struct efi_rs_state state = { .cr3 = 0 }; if ( !efi_l4_pgtable ) - return 0; + return state; + state.cr3 = read_cr3(); save_fpu_enable(); - asm volatile ( "fldcw %0" :: "m" (fcw) ); + asm volatile ( "fnclex; fldcw %0" :: "m" (fcw) ); asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) ); spin_lock(&efi_rs_lock); @@ -85,14 +106,14 @@ write_cr3(virt_to_maddr(efi_l4_pgtable)); - return cr3; + return state; } -void efi_rs_leave(unsigned long cr3) +void efi_rs_leave(struct efi_rs_state *state) { - if ( !cr3 ) + if ( !state->cr3 ) return; - write_cr3(cr3); + write_cr3(state->cr3); if ( is_pv_vcpu(current) && !is_idle_vcpu(current) ) { struct desc_ptr gdt_desc = { @@ -119,14 +140,15 @@ { EFI_TIME time; EFI_STATUS status; - unsigned long cr3 = efi_rs_enter(), flags; + struct efi_rs_state state = efi_rs_enter(); + unsigned long flags; - if ( !cr3 ) + if ( !state.cr3 ) return 0; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->GetTime(&time, NULL); spin_unlock_irqrestore(&rtc_lock, flags); - efi_rs_leave(cr3); + efi_rs_leave(&state); if ( EFI_ERROR(status) ) return 0; @@ -138,12 +160,12 @@ void efi_halt_system(void) { EFI_STATUS status; - unsigned long cr3 = efi_rs_enter(); + struct efi_rs_state state = efi_rs_enter(); - if ( !cr3 ) + if ( !state.cr3 ) return; status = efi_rs->ResetSystem(EfiResetShutdown, EFI_SUCCESS, 0, NULL); - efi_rs_leave(cr3); + efi_rs_leave(&state); printk(XENLOG_WARNING "EFI: could not halt system (%#lx)\n", status); } @@ -151,13 +173,13 @@ void efi_reset_system(bool_t warm) { EFI_STATUS status; - unsigned long cr3 = efi_rs_enter(); + struct efi_rs_state state = efi_rs_enter(); - if ( !cr3 ) + if ( !state.cr3 ) return; status = efi_rs->ResetSystem(warm ? EfiResetWarm : EfiResetCold, EFI_SUCCESS, 0, NULL); - efi_rs_leave(cr3); + efi_rs_leave(&state); printk(XENLOG_WARNING "EFI: could not reset system (%#lx)\n", status); } @@ -177,12 +199,12 @@ break; case XEN_FW_EFI_RT_VERSION: { - unsigned long cr3 = efi_rs_enter(); + struct efi_rs_state state = efi_rs_enter(); - if ( !cr3 ) + if ( !state.cr3 ) return -EOPNOTSUPP; info->version = efi_rs->Hdr.Revision; - efi_rs_leave(cr3); + efi_rs_leave(&state); break; } case XEN_FW_EFI_CONFIG_TABLE: @@ -300,7 +322,8 @@ int efi_runtime_call(struct xenpf_efi_runtime_call *op) { - unsigned long cr3, flags; + struct efi_rs_state state; + unsigned long flags; EFI_STATUS status = EFI_NOT_STARTED; int rc = 0; @@ -313,13 +336,13 @@ if ( op->misc ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( !cr3 ) + state = efi_rs_enter(); + if ( !state.cr3 ) return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->GetTime(cast_time(&op->u.get_time.time), &caps); spin_unlock_irqrestore(&rtc_lock, flags); - efi_rs_leave(cr3); + efi_rs_leave(&state); if ( !EFI_ERROR(status) ) { @@ -335,13 +358,13 @@ if ( op->misc ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( !cr3 ) + state = efi_rs_enter(); + if ( !state.cr3 ) return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->SetTime(cast_time(&op->u.set_time)); spin_unlock_irqrestore(&rtc_lock, flags); - efi_rs_leave(cr3); + efi_rs_leave(&state); break; case XEN_EFI_get_wakeup_time: @@ -351,14 +374,14 @@ if ( op->misc ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( !cr3 ) + state = efi_rs_enter(); + if ( !state.cr3 ) return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->GetWakeupTime(&enabled, &pending, cast_time(&op->u.get_wakeup_time)); spin_unlock_irqrestore(&rtc_lock, flags); - efi_rs_leave(cr3); + efi_rs_leave(&state); if ( !EFI_ERROR(status) ) { @@ -375,8 +398,8 @@ XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY) ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( !cr3 ) + state = efi_rs_enter(); + if ( !state.cr3 ) return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->SetWakeupTime(!!(op->misc & @@ -386,7 +409,7 @@ NULL : cast_time(&op->u.set_wakeup_time)); spin_unlock_irqrestore(&rtc_lock, flags); - efi_rs_leave(cr3); + efi_rs_leave(&state); op->misc = 0; break; @@ -395,12 +418,12 @@ if ( op->misc ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( cr3 ) + state = efi_rs_enter(); + if ( state.cr3 ) status = efi_rs->GetNextHighMonotonicCount(&op->misc); else rc = -EOPNOTSUPP; - efi_rs_leave(cr3); + efi_rs_leave(&state); break; case XEN_EFI_get_variable: @@ -434,13 +457,13 @@ else data = NULL; - cr3 = efi_rs_enter(); - if ( cr3 ) + state = efi_rs_enter(); + if ( state.cr3 ) { status = efi_rs->GetVariable( name, cast_guid(&op->u.get_variable.vendor_guid), &op->misc, &size, data); - efi_rs_leave(cr3); + efi_rs_leave(&state); if ( !EFI_ERROR(status) && copy_to_guest(op->u.get_variable.data, data, size) ) @@ -477,14 +500,14 @@ rc = -EFAULT; else { - cr3 = efi_rs_enter(); - if ( cr3 ) + state = efi_rs_enter(); + if ( state.cr3 ) status = efi_rs->SetVariable( name, cast_guid(&op->u.set_variable.vendor_guid), op->misc, op->u.set_variable.size, data); else rc = -EOPNOTSUPP; - efi_rs_leave(cr3); + efi_rs_leave(&state); } xfree(data); @@ -514,13 +537,13 @@ return -EFAULT; } - cr3 = efi_rs_enter(); - if ( cr3 ) + state = efi_rs_enter(); + if ( state.cr3 ) { status = efi_rs->GetNextVariableName( &size, name.str, cast_guid(&op->u.get_next_variable_name.vendor_guid)); - efi_rs_leave(cr3); + efi_rs_leave(&state); /* * Copy the variable name if necessary. The caller provided size @@ -569,10 +592,10 @@ break; } - cr3 = efi_rs_enter(); - if ( !cr3 || (efi_rs->Hdr.Revision >> 16) < 2 ) + state = efi_rs_enter(); + if ( !state.cr3 || (efi_rs->Hdr.Revision >> 16) < 2 ) { - efi_rs_leave(cr3); + efi_rs_leave(&state); return -EOPNOTSUPP; } status = efi_rs->QueryVariableInfo( @@ -580,7 +603,7 @@ &op->u.query_variable_info.max_store_size, &op->u.query_variable_info.remain_store_size, &op->u.query_variable_info.max_size); - efi_rs_leave(cr3); + efi_rs_leave(&state); break; case XEN_EFI_query_capsule_capabilities: @@ -588,13 +611,13 @@ if ( op->misc ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( !cr3 || (efi_rs->Hdr.Revision >> 16) < 2 ) + state = efi_rs_enter(); + if ( !state.cr3 || (efi_rs->Hdr.Revision >> 16) < 2 ) { - efi_rs_leave(cr3); + efi_rs_leave(&state); return -EOPNOTSUPP; } - efi_rs_leave(cr3); + efi_rs_leave(&state); /* XXX fall through for now */ default: return -ENOSYS; diff -Nru xen-4.6.0/xen/common/event_channel.c xen-4.6.5/xen/common/event_channel.c --- xen-4.6.0/xen/common/event_channel.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/event_channel.c 2017-03-07 16:19:05.000000000 +0000 @@ -170,7 +170,8 @@ { if ( port > d->max_evtchn_port ) return -ENOSPC; - if ( evtchn_from_port(d, port)->state == ECS_FREE ) + if ( evtchn_from_port(d, port)->state == ECS_FREE + && !evtchn_port_is_busy(d, port) ) return port; } diff -Nru xen-4.6.0/xen/common/event_fifo.c xen-4.6.5/xen/common/event_fifo.c --- xen-4.6.0/xen/common/event_fifo.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/event_fifo.c 2017-03-07 16:19:05.000000000 +0000 @@ -312,6 +312,17 @@ return test_bit(EVTCHN_FIFO_MASKED, word); } +static bool_t evtchn_fifo_is_busy(struct domain *d, evtchn_port_t port) +{ + event_word_t *word; + + word = evtchn_fifo_word_from_port(d, port); + if ( unlikely(!word) ) + return 0; + + return test_bit(EVTCHN_FIFO_LINKED, word); +} + static int evtchn_fifo_set_priority(struct domain *d, struct evtchn *evtchn, unsigned int priority) { @@ -351,6 +362,7 @@ .unmask = evtchn_fifo_unmask, .is_pending = evtchn_fifo_is_pending, .is_masked = evtchn_fifo_is_masked, + .is_busy = evtchn_fifo_is_busy, .set_priority = evtchn_fifo_set_priority, .print_state = evtchn_fifo_print_state, }; diff -Nru xen-4.6.0/xen/common/libelf/libelf-dominfo.c xen-4.6.5/xen/common/libelf/libelf-dominfo.c --- xen-4.6.0/xen/common/libelf/libelf-dominfo.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/libelf/libelf-dominfo.c 2017-03-07 16:19:05.000000000 +0000 @@ -532,7 +532,7 @@ if ( xen_elfnotes == 0 ) { count = elf_shdr_count(elf); - for ( i = 0; i < count; i++ ) + for ( i = 1; i < count; i++ ) { shdr = elf_shdr_by_index(elf, i); if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) diff -Nru xen-4.6.0/xen/common/libelf/libelf-loader.c xen-4.6.5/xen/common/libelf/libelf-loader.c --- xen-4.6.0/xen/common/libelf/libelf-loader.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/libelf/libelf-loader.c 2017-03-07 16:19:05.000000000 +0000 @@ -71,7 +71,7 @@ /* Find symbol table and symbol string table. */ count = elf_shdr_count(elf); - for ( i = 0; i < count; i++ ) + for ( i = 1; i < count; i++ ) { shdr = elf_shdr_by_index(elf, i); if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) diff -Nru xen-4.6.0/xen/common/libelf/libelf-tools.c xen-4.6.5/xen/common/libelf/libelf-tools.c --- xen-4.6.0/xen/common/libelf/libelf-tools.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/libelf/libelf-tools.c 2017-03-07 16:19:05.000000000 +0000 @@ -154,7 +154,7 @@ const char *sname; unsigned i; - for ( i = 0; i < count; i++ ) + for ( i = 1; i < count; i++ ) { shdr = elf_shdr_by_index(elf, i); if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) diff -Nru xen-4.6.0/xen/common/memory.c xen-4.6.5/xen/common/memory.c --- xen-4.6.0/xen/common/memory.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/memory.c 2017-03-07 16:19:05.000000000 +0000 @@ -43,6 +43,52 @@ int preempted; /* Was the hypercall preempted? */ }; +#ifndef CONFIG_CTLDOM_MAX_ORDER +#define CONFIG_CTLDOM_MAX_ORDER CONFIG_PAGEALLOC_MAX_ORDER +#endif +#ifndef CONFIG_PTDOM_MAX_ORDER +#define CONFIG_PTDOM_MAX_ORDER CONFIG_HWDOM_MAX_ORDER +#endif + +static unsigned int __read_mostly domu_max_order = CONFIG_DOMU_MAX_ORDER; +static unsigned int __read_mostly ctldom_max_order = CONFIG_CTLDOM_MAX_ORDER; +static unsigned int __read_mostly hwdom_max_order = CONFIG_HWDOM_MAX_ORDER; +#ifdef HAS_PASSTHROUGH +static unsigned int __read_mostly ptdom_max_order = CONFIG_PTDOM_MAX_ORDER; +#endif +static void __init parse_max_order(const char *s) +{ + if ( *s != ',' ) + domu_max_order = simple_strtoul(s, &s, 0); + if ( *s == ',' && *++s != ',' ) + ctldom_max_order = simple_strtoul(s, &s, 0); + if ( *s == ',' && *++s != ',' ) + hwdom_max_order = simple_strtoul(s, &s, 0); +#ifdef HAS_PASSTHROUGH + if ( *s == ',' && *++s != ',' ) + ptdom_max_order = simple_strtoul(s, &s, 0); +#endif +} +custom_param("memop-max-order", parse_max_order); + +static unsigned int max_order(const struct domain *d) +{ + unsigned int order = domu_max_order; + +#ifdef HAS_PASSTHROUGH + if ( cache_flush_permitted(d) && order < ptdom_max_order ) + order = ptdom_max_order; +#endif + + if ( is_control_domain(d) && order < ctldom_max_order ) + order = ctldom_max_order; + + if ( is_hardware_domain(d) && order < hwdom_max_order ) + order = hwdom_max_order; + + return min(order, MAX_ORDER + 0U); +} + static void increase_reservation(struct memop_args *a) { struct page_info *page; @@ -55,7 +101,7 @@ a->nr_extents-1) ) return; - if ( !multipage_allocation_permitted(current->domain, a->extent_order) ) + if ( a->extent_order > max_order(current->domain) ) return; for ( i = a->nr_done; i < a->nr_extents; i++ ) @@ -94,14 +140,14 @@ struct page_info *page; unsigned long i, j; xen_pfn_t gpfn, mfn; - struct domain *d = a->domain; + struct domain *d = a->domain, *curr_d = current->domain; if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) ) return; - if ( a->memflags & MEMF_populate_on_demand ? a->extent_order > MAX_ORDER : - !multipage_allocation_permitted(current->domain, a->extent_order) ) + if ( a->extent_order > (a->memflags & MEMF_populate_on_demand ? MAX_ORDER : + max_order(curr_d)) ) return; for ( i = a->nr_done; i < a->nr_extents; i++ ) @@ -117,6 +163,10 @@ if ( a->memflags & MEMF_populate_on_demand ) { + /* Disallow populating PoD pages on oneself. */ + if ( d == curr_d ) + goto out; + if ( guest_physmap_mark_populate_on_demand(d, gpfn, a->extent_order) < 0 ) goto out; @@ -272,7 +322,7 @@ if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) || - a->extent_order > MAX_ORDER ) + a->extent_order > max_order(current->domain) ) return; for ( i = a->nr_done; i < a->nr_extents; i++ ) @@ -328,7 +378,7 @@ PAGE_LIST_HEAD(out_chunk_list); unsigned long in_chunk_order, out_chunk_order; xen_pfn_t gpfn, gmfn, mfn; - unsigned long i, j, k = 0; /* gcc ... */ + unsigned long i, j, k; unsigned int memflags = 0; long rc = 0; struct domain *d; @@ -337,13 +387,17 @@ if ( copy_from_guest(&exch, arg, 1) ) return -EFAULT; + if ( max(exch.in.extent_order, exch.out.extent_order) > + max_order(current->domain) ) + { + rc = -EPERM; + goto fail_early; + } + /* Various sanity checks. */ if ( (exch.nr_exchanged > exch.in.nr_extents) || /* Input and output domain identifiers match? */ (exch.in.domid != exch.out.domid) || - /* Extent orders are sensible? */ - (exch.in.extent_order > MAX_ORDER) || - (exch.out.extent_order > MAX_ORDER) || /* Sizes of input and output lists do not overflow a long? */ ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) || ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) || @@ -362,16 +416,6 @@ goto fail_early; } - /* Only privileged guests can allocate multi-page contiguous extents. */ - if ( !multipage_allocation_permitted(current->domain, - exch.in.extent_order) || - !multipage_allocation_permitted(current->domain, - exch.out.extent_order) ) - { - rc = -EPERM; - goto fail_early; - } - if ( exch.in.extent_order <= exch.out.extent_order ) { in_chunk_order = exch.out.extent_order - exch.in.extent_order; @@ -566,11 +610,12 @@ fail: /* Reassign any input pages we managed to steal. */ while ( (page = page_list_remove_head(&in_chunk_list)) ) - { - put_gfn(d, gmfn + k--); if ( assign_pages(d, page, 0, MEMF_no_refcount) ) - BUG(); - } + { + BUG_ON(!d->is_dying); + if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) + put_page(page); + } dying: rcu_unlock_domain(d); diff -Nru xen-4.6.0/xen/common/page_alloc.c xen-4.6.5/xen/common/page_alloc.c --- xen-4.6.0/xen/common/page_alloc.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/page_alloc.c 2017-03-07 16:19:05.000000000 +0000 @@ -1268,16 +1268,7 @@ init_heap_pages(virt_to_page(bootmem_region_list), 1); if ( !dma_bitsize && (num_online_nodes() > 1) ) - { -#ifdef CONFIG_X86 - dma_bitsize = min_t(unsigned int, - flsl(NODE_DATA(0)->node_spanned_pages) - 1 - + PAGE_SHIFT - 2, - 32); -#else - dma_bitsize = 32; -#endif - } + dma_bitsize = arch_get_dma_bitsize(); printk("Domain heap initialised"); if ( dma_bitsize ) diff -Nru xen-4.6.0/xen/common/sched_credit2.c xen-4.6.5/xen/common/sched_credit2.c --- xen-4.6.0/xen/common/sched_credit2.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/sched_credit2.c 2017-03-07 16:19:05.000000000 +0000 @@ -268,6 +268,7 @@ uint16_t nr_vcpus; }; +static int csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc); /* * Time-to-credit, credit-to-time. @@ -870,28 +871,30 @@ { struct csched2_vcpu *svc = vc->sched_priv; struct csched2_dom * const sdom = svc->sdom; + spinlock_t *lock; printk("%s: Inserting %pv\n", __func__, vc); - /* NB: On boot, idle vcpus are inserted before alloc_pdata() has - * been called for that cpu. - */ - if ( ! is_idle_vcpu(vc) ) - { - spinlock_t *lock; + BUG_ON(is_idle_vcpu(vc)); - /* FIXME: Do we need the private lock here? */ - list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu); + /* FIXME: Do we need the private lock here? */ + list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu); - /* Add vcpu to runqueue of initial processor */ - lock = vcpu_schedule_lock_irq(vc); + /* csched2_cpu_pick() expects the pcpu lock to be held */ + lock = vcpu_schedule_lock_irq(vc); - runq_assign(ops, vc); + vc->processor = csched2_cpu_pick(ops, vc); - vcpu_schedule_unlock_irq(lock, vc); + spin_unlock_irq(lock); - sdom->nr_vcpus++; - } + lock = vcpu_schedule_lock_irq(vc); + + /* Add vcpu to runqueue of initial processor */ + runq_assign(ops, vc); + + vcpu_schedule_unlock_irq(lock, vc); + + sdom->nr_vcpus++; CSCHED2_VCPU_CHECK(vc); } @@ -958,7 +961,7 @@ csched2_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) { struct csched2_vcpu * const svc = CSCHED2_VCPU(vc); - s_time_t now = 0; + s_time_t now; /* Schedule lock should be held at this point. */ @@ -1015,8 +1018,8 @@ csched2_context_saved(const struct scheduler *ops, struct vcpu *vc) { struct csched2_vcpu * const svc = CSCHED2_VCPU(vc); - s_time_t now = NOW(); spinlock_t *lock = vcpu_schedule_lock_irq(vc); + s_time_t now = NOW(); BUG_ON( !is_idle_vcpu(vc) && svc->rqd != RQD(ops, vc->processor)); @@ -1209,6 +1212,7 @@ svc->migrate_rqd = trqd; set_bit(_VPF_migrating, &svc->vcpu->pause_flags); set_bit(__CSFLAG_runq_migrate_request, &svc->flags); + cpu_raise_softirq(svc->vcpu->processor, SCHEDULE_SOFTIRQ); SCHED_STAT_CRANK(migrate_requested); } else diff -Nru xen-4.6.0/xen/common/sched_credit.c xen-4.6.5/xen/common/sched_credit.c --- xen-4.6.0/xen/common/sched_credit.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/sched_credit.c 2017-03-07 16:19:05.000000000 +0000 @@ -50,6 +50,8 @@ /* Default timeslice: 30ms */ #define CSCHED_DEFAULT_TSLICE_MS 30 #define CSCHED_CREDITS_PER_MSEC 10 +/* Never set a timer shorter than this value. */ +#define CSCHED_MIN_TIMER XEN_SYSCTL_SCHED_RATELIMIT_MIN /* @@ -78,9 +80,6 @@ #define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv) #define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv) #define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq)) -/* Is the first element of _cpu's runq its idle vcpu? */ -#define IS_RUNQ_IDLE(_cpu) (list_empty(RUNQ(_cpu)) || \ - is_idle_vcpu(__runq_elem(RUNQ(_cpu)->next)->vcpu)) /* @@ -251,6 +250,18 @@ return list_entry(elem, struct csched_vcpu, runq_elem); } +/* Is the first element of cpu's runq (if any) cpu's idle vcpu? */ +static inline bool_t is_runq_idle(unsigned int cpu) +{ + /* + * We're peeking at cpu's runq, we must hold the proper lock. + */ + ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock)); + + return list_empty(RUNQ(cpu)) || + is_idle_vcpu(__runq_elem(RUNQ(cpu)->next)->vcpu); +} + static inline void __runq_insert(unsigned int cpu, struct csched_vcpu *svc) { @@ -453,11 +464,12 @@ if ( opt_tickle_one_idle ) { this_cpu(last_tickle_cpu) = - cpumask_cycle(this_cpu(last_tickle_cpu), &idle_mask); + cpumask_cycle(this_cpu(last_tickle_cpu), + csched_balance_mask(cpu)); __cpumask_set_cpu(this_cpu(last_tickle_cpu), &mask); } else - cpumask_or(&mask, &mask, &idle_mask); + cpumask_or(&mask, &mask, csched_balance_mask(cpu)); } /* Did we find anyone? */ @@ -696,7 +708,7 @@ * runnable vcpu on cpu, we add cpu to the idlers. */ cpumask_and(&idlers, &cpu_online_map, CSCHED_PRIV(ops)->idlers); - if ( vc->processor == cpu && IS_RUNQ_IDLE(cpu) ) + if ( vc->processor == cpu && is_runq_idle(cpu) ) __cpumask_set_cpu(cpu, &idlers); cpumask_and(&cpus, &cpus, &idlers); @@ -862,21 +874,33 @@ /* * Put this VCPU and domain back on the active list if it was * idling. - * - * If it's been active a while, check if we'd be better off - * migrating it to run elsewhere (see multi-core and multi-thread - * support in csched_cpu_pick()). */ if ( list_empty(&svc->active_vcpu_elem) ) { __csched_vcpu_acct_start(prv, svc); } - else if ( _csched_cpu_pick(ops, current, 0) != cpu ) + else { - SCHED_VCPU_STAT_CRANK(svc, migrate_r); - SCHED_STAT_CRANK(migrate_running); - set_bit(_VPF_migrating, ¤t->pause_flags); - cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + unsigned int new_cpu; + unsigned long flags; + spinlock_t *lock = vcpu_schedule_lock_irqsave(current, &flags); + + /* + * If it's been active a while, check if we'd be better off + * migrating it to run elsewhere (see multi-core and multi-thread + * support in csched_cpu_pick()). + */ + new_cpu = _csched_cpu_pick(ops, current, 0); + + vcpu_schedule_unlock_irqrestore(lock, flags, current); + + if ( new_cpu != cpu ) + { + SCHED_VCPU_STAT_CRANK(svc, migrate_r); + SCHED_STAT_CRANK(migrate_running); + set_bit(_VPF_migrating, ¤t->pause_flags); + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } } } @@ -905,9 +929,23 @@ csched_vcpu_insert(const struct scheduler *ops, struct vcpu *vc) { struct csched_vcpu *svc = vc->sched_priv; + spinlock_t *lock; + + BUG_ON( is_idle_vcpu(vc) ); + + /* csched_cpu_pick() looks in vc->processor's runq, so we need the lock. */ + lock = vcpu_schedule_lock_irq(vc); + + vc->processor = csched_cpu_pick(ops, vc); + + spin_unlock_irq(lock); + + lock = vcpu_schedule_lock_irq(vc); if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running ) __runq_insert(vc->processor, svc); + + vcpu_schedule_unlock_irq(lock, vc); } static void @@ -1081,6 +1119,7 @@ prv->ticks_per_tslice = 1; prv->tick_period_us = prv->tslice_ms * 1000 / prv->ticks_per_tslice; prv->credits_per_tslice = CSCHED_CREDITS_PER_MSEC * prv->tslice_ms; + prv->credit = prv->credits_per_tslice * prv->ncpus; } static int @@ -1090,6 +1129,7 @@ int rc = -EINVAL; xen_sysctl_credit_schedule_t *params = &sc->u.sched_credit; struct csched_private *prv = CSCHED_PRIV(ops); + unsigned long flags; switch ( sc->cmd ) { @@ -1101,8 +1141,12 @@ || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN)) || MICROSECS(params->ratelimit_us) > MILLISECS(params->tslice_ms) ) goto out; + + spin_lock_irqsave(&prv->lock, flags); __csched_set_tslice(prv, params->tslice_ms); prv->ratelimit_us = params->ratelimit_us; + spin_unlock_irqrestore(&prv->lock, flags); + /* FALLTHRU */ case XEN_SYSCTL_SCHEDOP_getinfo: params->tslice_ms = prv->tslice_ms; @@ -1671,7 +1715,15 @@ snext = scurr; snext->start_time += now; perfc_incr(delay_ms); - tslice = MICROSECS(prv->ratelimit_us); + /* + * Next timeslice must last just until we'll have executed for + * ratelimit_us. However, to avoid setting a really short timer, which + * will most likely be inaccurate and counterproductive, we never go + * below CSCHED_MIN_TIMER. + */ + tslice = MICROSECS(prv->ratelimit_us) - runtime; + if ( unlikely(runtime < CSCHED_MIN_TIMER) ) + tslice = CSCHED_MIN_TIMER; ret.migrated = 0; goto out; } diff -Nru xen-4.6.0/xen/common/sched_rt.c xen-4.6.5/xen/common/sched_rt.c --- xen-4.6.0/xen/common/sched_rt.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/sched_rt.c 2017-03-07 16:19:05.000000000 +0000 @@ -187,6 +187,8 @@ struct domain *dom; /* pointer to upper domain */ }; +static int rt_cpu_pick(const struct scheduler *ops, struct vcpu *vc); + /* * Useful inline functions */ @@ -621,17 +623,23 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu *vc) { struct rt_vcpu *svc = rt_vcpu(vc); - s_time_t now = NOW(); + s_time_t now; + spinlock_t *lock; - /* not addlocate idle vcpu to dom vcpu list */ - if ( is_idle_vcpu(vc) ) - return; + BUG_ON( is_idle_vcpu(vc) ); + /* This is safe because vc isn't yet being scheduled */ + vc->processor = rt_cpu_pick(ops, vc); + + lock = vcpu_schedule_lock_irq(vc); + + now = NOW(); if ( now >= svc->cur_deadline ) rt_update_deadline(now, svc); if ( !__vcpu_on_q(svc) && vcpu_runnable(vc) && !vc->is_running ) __runq_insert(ops, svc); + vcpu_schedule_unlock_irq(lock, vc); /* add rt_vcpu svc to scheduler-specific vcpu list of the dom */ list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu); @@ -1032,7 +1040,7 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) { struct rt_vcpu * const svc = rt_vcpu(vc); - s_time_t now = NOW(); + s_time_t now; struct rt_private *prv = rt_priv(ops); struct rt_vcpu *snext = NULL; /* highest priority on RunQ */ struct rt_dom *sdom = NULL; @@ -1068,6 +1076,7 @@ return; } + now = NOW(); if ( now >= svc->cur_deadline) rt_update_deadline(now, svc); diff -Nru xen-4.6.0/xen/common/schedule.c xen-4.6.5/xen/common/schedule.c --- xen-4.6.0/xen/common/schedule.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/schedule.c 2017-03-07 16:19:05.000000000 +0000 @@ -240,20 +240,22 @@ init_timer(&v->poll_timer, poll_timer_fn, v, v->processor); - /* Idle VCPUs are scheduled immediately. */ + v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv); + if ( v->sched_priv == NULL ) + return 1; + + TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id); + + /* Idle VCPUs are scheduled immediately, so don't put them in runqueue. */ if ( is_idle_domain(d) ) { per_cpu(schedule_data, v->processor).curr = v; v->is_running = 1; } - - TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id); - - v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv); - if ( v->sched_priv == NULL ) - return 1; - - SCHED_OP(DOM2OP(d), insert_vcpu, v); + else + { + SCHED_OP(DOM2OP(d), insert_vcpu, v); + } return 0; } @@ -1213,7 +1215,7 @@ static void schedule(void) { struct vcpu *prev = current, *next = NULL; - s_time_t now = NOW(); + s_time_t now; struct scheduler *sched; unsigned long *tasklet_work = &this_cpu(tasklet_work_to_do); bool_t tasklet_work_scheduled = 0; @@ -1248,6 +1250,8 @@ lock = pcpu_schedule_lock_irq(cpu); + now = NOW(); + stop_timer(&sd->s_timer); /* get policy-specific decision on scheduling... */ @@ -1488,7 +1492,6 @@ int schedule_cpu_switch(unsigned int cpu, struct cpupool *c) { - unsigned long flags; struct vcpu *idle; spinlock_t *lock; void *ppriv, *ppriv_old, *vpriv, *vpriv_old; @@ -1509,7 +1512,7 @@ return -ENOMEM; } - lock = pcpu_schedule_lock_irqsave(cpu, &flags); + lock = pcpu_schedule_lock_irq(cpu); SCHED_OP(old_ops, tick_suspend, cpu); vpriv_old = idle->sched_priv; @@ -1518,9 +1521,8 @@ ppriv_old = per_cpu(schedule_data, cpu).sched_priv; per_cpu(schedule_data, cpu).sched_priv = ppriv; SCHED_OP(new_ops, tick_resume, cpu); - SCHED_OP(new_ops, insert_vcpu, idle); - pcpu_schedule_unlock_irqrestore(lock, flags, cpu); + pcpu_schedule_unlock_irq(lock, cpu); SCHED_OP(old_ops, free_vdata, vpriv_old); SCHED_OP(old_ops, free_pdata, ppriv_old, cpu); diff -Nru xen-4.6.0/xen/common/spinlock.c xen-4.6.5/xen/common/spinlock.c --- xen-4.6.0/xen/common/spinlock.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/spinlock.c 2017-03-07 16:19:05.000000000 +0000 @@ -141,7 +141,7 @@ while ( tickets.tail != observe_head(&lock->tickets) ) { LOCK_PROFILE_BLOCK; - cpu_relax(); + arch_lock_relax(); } LOCK_PROFILE_GOT; preempt_disable(); @@ -170,6 +170,7 @@ preempt_enable(); LOCK_PROFILE_REL; add_sized(&lock->tickets.head, 1); + arch_lock_signal(); } void _spin_unlock_irq(spinlock_t *lock) @@ -228,7 +229,7 @@ if ( sample.head != sample.tail ) { while ( observe_head(&lock->tickets) == sample.head ) - cpu_relax(); + arch_lock_relax(); #ifdef LOCK_PROFILE if ( lock->profile ) { diff -Nru xen-4.6.0/xen/common/trace.c xen-4.6.5/xen/common/trace.c --- xen-4.6.0/xen/common/trace.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/trace.c 2017-03-07 16:19:05.000000000 +0000 @@ -148,8 +148,12 @@ pages = max_pages; } - t_info_words = num_online_cpus() * pages * sizeof(uint32_t); - t_info_pages = PFN_UP(t_info_first_offset + t_info_words); + /* + * NB this calculation is correct, because t_info_first_offset is + * in words, not bytes, not bytes + */ + t_info_words = num_online_cpus() * pages + t_info_first_offset; + t_info_pages = PFN_UP(t_info_words * sizeof(uint32_t)); printk(XENLOG_INFO "xentrace: requesting %u t_info pages " "for %u trace pages on %u cpus\n", t_info_pages, pages, num_online_cpus()); diff -Nru xen-4.6.0/xen/common/xenoprof.c xen-4.6.5/xen/common/xenoprof.c --- xen-4.6.0/xen/common/xenoprof.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/common/xenoprof.c 2017-03-07 16:19:05.000000000 +0000 @@ -239,6 +239,7 @@ d->xenoprof->rawbuf = alloc_xenheap_pages(get_order_from_pages(npages), 0); if ( d->xenoprof->rawbuf == NULL ) { + xfree(d->xenoprof->vcpu); xfree(d->xenoprof); d->xenoprof = NULL; return -ENOMEM; @@ -286,6 +287,7 @@ free_xenheap_pages(x->rawbuf, order); } + xfree(x->vcpu); xfree(x); d->xenoprof = NULL; } @@ -674,15 +676,13 @@ if ( (op < 0) || (op > XENOPROF_last_op) ) { - printk("xenoprof: invalid operation %d for domain %d\n", - op, current->domain->domain_id); + gdprintk(XENLOG_DEBUG, "invalid operation %d\n", op); return -EINVAL; } if ( !NONPRIV_OP(op) && (current->domain != xenoprof_primary_profiler) ) { - printk("xenoprof: dom %d denied privileged operation %d\n", - current->domain->domain_id, op); + gdprintk(XENLOG_DEBUG, "denied privileged operation %d\n", op); return -EPERM; } @@ -905,8 +905,7 @@ spin_unlock(&xenoprof_lock); if ( ret < 0 ) - printk("xenoprof: operation %d failed for dom %d (status : %d)\n", - op, current->domain->domain_id, ret); + gdprintk(XENLOG_DEBUG, "operation %d failed: %d\n", op, ret); return ret; } diff -Nru xen-4.6.0/xen/drivers/acpi/numa.c xen-4.6.5/xen/drivers/acpi/numa.c --- xen-4.6.0/xen/drivers/acpi/numa.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/acpi/numa.c 2017-03-07 16:19:05.000000000 +0000 @@ -198,9 +198,9 @@ /* SRAT: Static Resource Affinity Table */ if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, - acpi_parse_x2apic_affinity, NR_CPUS); + acpi_parse_x2apic_affinity, 0); acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, - acpi_parse_processor_affinity, NR_CPUS); + acpi_parse_processor_affinity, 0); acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, acpi_parse_memory_affinity, NR_NODE_MEMBLKS); diff -Nru xen-4.6.0/xen/drivers/char/cadence-uart.c xen-4.6.5/xen/drivers/char/cadence-uart.c --- xen-4.6.0/xen/drivers/char/cadence-uart.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/char/cadence-uart.c 2017-03-07 16:19:05.000000000 +0000 @@ -206,6 +206,7 @@ static const struct dt_device_match cuart_dt_match[] __initconst = { DT_MATCH_COMPATIBLE("cdns,uart-r1p8"), + DT_MATCH_COMPATIBLE("cdns,uart-r1p12"), { /* sentinel */ }, }; diff -Nru xen-4.6.0/xen/drivers/char/serial.c xen-4.6.5/xen/drivers/char/serial.c --- xen-4.6.0/xen/drivers/char/serial.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/char/serial.c 2017-03-07 16:19:05.000000000 +0000 @@ -310,7 +310,7 @@ goto common; } - if ( !strncmp(conf, "dtuart", 5) ) + if ( !strncmp(conf, "dtuart", 6) ) { handle = SERHND_DTUART; goto common; diff -Nru xen-4.6.0/xen/drivers/passthrough/amd/iommu_cmd.c xen-4.6.5/xen/drivers/passthrough/amd/iommu_cmd.c --- xen-4.6.0/xen/drivers/passthrough/amd/iommu_cmd.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/amd/iommu_cmd.c 2017-03-07 16:19:05.000000000 +0000 @@ -18,7 +18,6 @@ */ #include -#include #include #include #include "../ats.h" diff -Nru xen-4.6.0/xen/drivers/passthrough/amd/iommu_guest.c xen-4.6.5/xen/drivers/passthrough/amd/iommu_guest.c --- xen-4.6.0/xen/drivers/passthrough/amd/iommu_guest.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/amd/iommu_guest.c 2017-03-07 16:19:05.000000000 +0000 @@ -18,7 +18,6 @@ #include #include -#include #include #include @@ -59,12 +58,12 @@ static inline struct guest_iommu *domain_iommu(struct domain *d) { - return domain_hvm_iommu(d)->arch.g_iommu; + return dom_iommu(d)->arch.g_iommu; } static inline struct guest_iommu *vcpu_iommu(struct vcpu *v) { - return domain_hvm_iommu(v->domain)->arch.g_iommu; + return dom_iommu(v->domain)->arch.g_iommu; } static void guest_iommu_enable(struct guest_iommu *iommu) @@ -885,7 +884,7 @@ int guest_iommu_init(struct domain* d) { struct guest_iommu *iommu; - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); if ( !is_hvm_domain(d) || !iommu_enabled || !iommuv2_enabled ) return 0; @@ -923,5 +922,5 @@ tasklet_kill(&iommu->cmd_buffer_tasklet); xfree(iommu); - domain_hvm_iommu(d)->arch.g_iommu = NULL; + dom_iommu(d)->arch.g_iommu = NULL; } diff -Nru xen-4.6.0/xen/drivers/passthrough/amd/iommu_intr.c xen-4.6.5/xen/drivers/passthrough/amd/iommu_intr.c --- xen-4.6.0/xen/drivers/passthrough/amd/iommu_intr.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/amd/iommu_intr.c 2017-03-07 16:19:05.000000000 +0000 @@ -18,7 +18,6 @@ #include #include -#include #include #include #include diff -Nru xen-4.6.0/xen/drivers/passthrough/amd/iommu_map.c xen-4.6.5/xen/drivers/passthrough/amd/iommu_map.c --- xen-4.6.0/xen/drivers/passthrough/amd/iommu_map.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/amd/iommu_map.c 2017-03-07 16:19:05.000000000 +0000 @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include "../ats.h" @@ -340,7 +339,7 @@ unsigned long first_mfn; u64 *table, *pde, *ntable; u64 ntable_maddr, mask; - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); bool_t ok = 0; ASSERT( spin_is_locked(&hd->arch.mapping_lock) && pt_mfn ); @@ -395,7 +394,7 @@ u64 *table, *pde, *ntable; u64 ntable_mfn; unsigned long first_mfn; - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); ASSERT( spin_is_locked(&hd->arch.mapping_lock) && pt_mfn ); @@ -445,7 +444,7 @@ unsigned long next_table_mfn; unsigned int level; struct page_info *table; - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); table = hd->arch.root_table; level = hd->arch.paging_mode; @@ -554,7 +553,7 @@ struct page_info *old_root = NULL; void *new_root_vaddr; unsigned long old_root_mfn; - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); if ( gfn == INVALID_MFN ) return -EADDRNOTAVAIL; @@ -637,7 +636,7 @@ unsigned int flags) { bool_t need_flush = 0; - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); unsigned long pt_mfn[7]; unsigned int merge_level; @@ -717,7 +716,7 @@ int amd_iommu_unmap_page(struct domain *d, unsigned long gfn) { unsigned long pt_mfn[7]; - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); BUG_ON( !hd->arch.root_table ); @@ -787,7 +786,7 @@ /* Share p2m table with iommu. */ void amd_iommu_share_p2m(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); struct page_info *p2m_table; mfn_t pgd_mfn; diff -Nru xen-4.6.0/xen/drivers/passthrough/amd/pci_amd_iommu.c xen-4.6.5/xen/drivers/passthrough/amd/pci_amd_iommu.c --- xen-4.6.0/xen/drivers/passthrough/amd/pci_amd_iommu.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/amd/pci_amd_iommu.c 2017-03-07 16:19:05.000000000 +0000 @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include "../ats.h" @@ -117,8 +116,7 @@ int req_id, valid = 1; int dte_i = 0; u8 bus = pdev->bus; - - struct hvm_iommu *hd = domain_hvm_iommu(domain); + const struct domain_iommu *hd = dom_iommu(domain); BUG_ON( !hd->arch.root_table || !hd->arch.paging_mode || !iommu->dev_table.buffer ); @@ -224,7 +222,7 @@ return scan_pci_devices(); } -static int allocate_domain_resources(struct hvm_iommu *hd) +static int allocate_domain_resources(struct domain_iommu *hd) { /* allocate root table */ spin_lock(&hd->arch.mapping_lock); @@ -259,7 +257,7 @@ static int amd_iommu_domain_init(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); /* allocate page directroy */ if ( allocate_domain_resources(hd) != 0 ) @@ -341,7 +339,7 @@ AMD_IOMMU_DEBUG("Disable: device id = %#x, " "domain = %d, paging mode = %d\n", req_id, domain->domain_id, - domain_hvm_iommu(domain)->arch.paging_mode); + dom_iommu(domain)->arch.paging_mode); } spin_unlock_irqrestore(&iommu->lock, flags); @@ -358,7 +356,7 @@ { struct amd_iommu *iommu; int bdf; - struct hvm_iommu *t = domain_hvm_iommu(target); + struct domain_iommu *t = dom_iommu(target); bdf = PCI_BDF2(pdev->bus, pdev->devfn); iommu = find_iommu_for_device(pdev->seg, bdf); @@ -459,7 +457,7 @@ static void deallocate_iommu_page_tables(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); if ( iommu_use_hap_pt(d) ) return; @@ -599,7 +597,7 @@ static void amd_dump_p2m_table(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); if ( !hd->arch.root_table ) return; diff -Nru xen-4.6.0/xen/drivers/passthrough/arm/smmu.c xen-4.6.5/xen/drivers/passthrough/arm/smmu.c --- xen-4.6.0/xen/drivers/passthrough/arm/smmu.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/arm/smmu.c 2017-03-07 16:19:05.000000000 +0000 @@ -2546,7 +2546,7 @@ static void arm_smmu_iotlb_flush_all(struct domain *d) { - struct arm_smmu_xen_domain *smmu_domain = domain_hvm_iommu(d)->arch.priv; + struct arm_smmu_xen_domain *smmu_domain = dom_iommu(d)->arch.priv; struct iommu_domain *cfg; spin_lock(&smmu_domain->lock); @@ -2577,7 +2577,7 @@ struct arm_smmu_xen_domain *xen_domain; struct arm_smmu_device *smmu; - xen_domain = domain_hvm_iommu(d)->arch.priv; + xen_domain = dom_iommu(d)->arch.priv; smmu = find_smmu_for_device(dev); if (!smmu) @@ -2610,7 +2610,7 @@ struct arm_smmu_xen_domain *xen_domain; int ret = 0; - xen_domain = domain_hvm_iommu(d)->arch.priv; + xen_domain = dom_iommu(d)->arch.priv; if (!dev->archdata.iommu) { dev->archdata.iommu = xzalloc(struct arm_smmu_xen_device); @@ -2671,7 +2671,7 @@ struct iommu_domain *domain = dev_iommu_domain(dev); struct arm_smmu_xen_domain *xen_domain; - xen_domain = domain_hvm_iommu(d)->arch.priv; + xen_domain = dom_iommu(d)->arch.priv; if (!domain || domain->priv->cfg.domain != d) { dev_err(dev, " not attached to domain %d\n", d->domain_id); @@ -2728,7 +2728,7 @@ spin_lock_init(&xen_domain->lock); INIT_LIST_HEAD(&xen_domain->contexts); - domain_hvm_iommu(d)->arch.priv = xen_domain; + dom_iommu(d)->arch.priv = xen_domain; /* Coherent walk can be enabled only when all SMMUs support it. */ if (platform_features & ARM_SMMU_FEAT_COHERENT_WALK) @@ -2743,7 +2743,7 @@ static void arm_smmu_iommu_domain_teardown(struct domain *d) { - struct arm_smmu_xen_domain *xen_domain = domain_hvm_iommu(d)->arch.priv; + struct arm_smmu_xen_domain *xen_domain = dom_iommu(d)->arch.priv; ASSERT(list_empty(&xen_domain->contexts)); xfree(xen_domain); diff -Nru xen-4.6.0/xen/drivers/passthrough/device_tree.c xen-4.6.5/xen/drivers/passthrough/device_tree.c --- xen-4.6.0/xen/drivers/passthrough/device_tree.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/device_tree.c 2017-03-07 16:19:05.000000000 +0000 @@ -27,7 +27,7 @@ int iommu_assign_dt_device(struct domain *d, struct dt_device_node *dev) { int rc = -EBUSY; - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); if ( !iommu_enabled || !hd->platform_ops ) return -EINVAL; @@ -69,7 +69,7 @@ int iommu_deassign_dt_device(struct domain *d, struct dt_device_node *dev) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); int rc; if ( !iommu_enabled || !hd->platform_ops ) @@ -109,16 +109,14 @@ int iommu_dt_domain_init(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); - - INIT_LIST_HEAD(&hd->dt_devices); + INIT_LIST_HEAD(&dom_iommu(d)->dt_devices); return 0; } int iommu_release_dt_devices(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); struct dt_device_node *dev, *_dev; int rc; diff -Nru xen-4.6.0/xen/drivers/passthrough/io.c xen-4.6.5/xen/drivers/passthrough/io.c --- xen-4.6.0/xen/drivers/passthrough/io.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/io.c 2017-03-07 16:19:05.000000000 +0000 @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -422,10 +421,10 @@ spin_unlock(&d->event_lock); if ( iommu_verbose ) - dprintk(XENLOG_G_INFO, - "d%d: bind: m_gsi=%u g_gsi=%u dev=%02x.%02x.%u intx=%u\n", - d->domain_id, pirq, guest_gsi, bus, - PCI_SLOT(device), PCI_FUNC(device), intx); + printk(XENLOG_G_INFO + "d%d: bind: m_gsi=%u g_gsi=%u dev=%02x.%02x.%u intx=%u\n", + d->domain_id, pirq, guest_gsi, bus, + PCI_SLOT(device), PCI_FUNC(device), intx); break; } @@ -455,11 +454,11 @@ unsigned int device = pt_irq_bind->u.pci.device; unsigned int intx = pt_irq_bind->u.pci.intx; - dprintk(XENLOG_G_INFO, - "d%d: unbind: m_gsi=%u g_gsi=%u dev=%02x:%02x.%u intx=%u\n", - d->domain_id, machine_gsi, hvm_pci_intx_gsi(device, intx), - pt_irq_bind->u.pci.bus, - PCI_SLOT(device), PCI_FUNC(device), intx); + printk(XENLOG_G_INFO + "d%d: unbind: m_gsi=%u g_gsi=%u dev=%02x:%02x.%u intx=%u\n", + d->domain_id, machine_gsi, hvm_pci_intx_gsi(device, intx), + pt_irq_bind->u.pci.bus, + PCI_SLOT(device), PCI_FUNC(device), intx); } break; case PT_IRQ_TYPE_MSI: @@ -555,10 +554,10 @@ { unsigned int device = pt_irq_bind->u.pci.device; - dprintk(XENLOG_G_INFO, - "d%d %s unmap: m_irq=%u dev=%02x:%02x.%u intx=%u\n", - d->domain_id, what, machine_gsi, pt_irq_bind->u.pci.bus, - PCI_SLOT(device), PCI_FUNC(device), pt_irq_bind->u.pci.intx); + printk(XENLOG_G_INFO + "d%d %s unmap: m_irq=%u dev=%02x:%02x.%u intx=%u\n", + d->domain_id, what, machine_gsi, pt_irq_bind->u.pci.bus, + PCI_SLOT(device), PCI_FUNC(device), pt_irq_bind->u.pci.intx); } return 0; diff -Nru xen-4.6.0/xen/drivers/passthrough/iommu.c xen-4.6.5/xen/drivers/passthrough/iommu.c --- xen-4.6.0/xen/drivers/passthrough/iommu.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/iommu.c 2017-03-07 16:19:05.000000000 +0000 @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -118,7 +117,7 @@ int iommu_domain_init(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); int ret = 0; ret = arch_iommu_domain_init(d); @@ -148,7 +147,7 @@ void __hwdom_init iommu_hwdom_init(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); check_hwdom_reqs(d); @@ -182,7 +181,7 @@ void iommu_teardown(struct domain *d) { - const struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); d->need_iommu = 0; hd->platform_ops->teardown(d); @@ -217,13 +216,10 @@ void iommu_domain_destroy(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); - - if ( !iommu_enabled || !hd->platform_ops ) + if ( !iommu_enabled || !dom_iommu(d)->platform_ops ) return; - if ( need_iommu(d) ) - iommu_teardown(d); + iommu_teardown(d); arch_iommu_domain_destroy(d); } @@ -231,7 +227,7 @@ int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int flags) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); if ( !iommu_enabled || !hd->platform_ops ) return 0; @@ -241,7 +237,7 @@ int iommu_unmap_page(struct domain *d, unsigned long gfn) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); if ( !iommu_enabled || !hd->platform_ops ) return 0; @@ -268,7 +264,7 @@ void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); if ( !iommu_enabled || !hd->platform_ops || !hd->platform_ops->iotlb_flush ) return; @@ -278,7 +274,7 @@ void iommu_iotlb_flush_all(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); if ( !iommu_enabled || !hd->platform_ops || !hd->platform_ops->iotlb_flush_all ) return; @@ -389,12 +385,10 @@ bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature) { - const struct hvm_iommu *hd = domain_hvm_iommu(d); - if ( !iommu_enabled ) return 0; - return test_bit(feature, hd->features); + return test_bit(feature, dom_iommu(d)->features); } static void iommu_dump_p2m_table(unsigned char key) diff -Nru xen-4.6.0/xen/drivers/passthrough/pci.c xen-4.6.5/xen/drivers/passthrough/pci.c --- xen-4.6.0/xen/drivers/passthrough/pci.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/pci.c 2017-03-07 16:19:05.000000000 +0000 @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -1241,7 +1240,7 @@ int iommu_add_device(struct pci_dev *pdev) { - struct hvm_iommu *hd; + const struct domain_iommu *hd; int rc; u8 devfn; @@ -1250,7 +1249,7 @@ ASSERT(spin_is_locked(&pcidevs_lock)); - hd = domain_hvm_iommu(pdev->domain); + hd = dom_iommu(pdev->domain); if ( !iommu_enabled || !hd->platform_ops ) return 0; @@ -1272,14 +1271,14 @@ int iommu_enable_device(struct pci_dev *pdev) { - struct hvm_iommu *hd; + const struct domain_iommu *hd; if ( !pdev->domain ) return -EINVAL; ASSERT(spin_is_locked(&pcidevs_lock)); - hd = domain_hvm_iommu(pdev->domain); + hd = dom_iommu(pdev->domain); if ( !iommu_enabled || !hd->platform_ops || !hd->platform_ops->enable_device ) return 0; @@ -1289,13 +1288,13 @@ int iommu_remove_device(struct pci_dev *pdev) { - struct hvm_iommu *hd; + const struct domain_iommu *hd; u8 devfn; if ( !pdev->domain ) return -EINVAL; - hd = domain_hvm_iommu(pdev->domain); + hd = dom_iommu(pdev->domain); if ( !iommu_enabled || !hd->platform_ops ) return 0; @@ -1335,7 +1334,7 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); struct pci_dev *pdev; int rc = 0; @@ -1395,7 +1394,7 @@ /* caller should hold the pcidevs_lock */ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); struct pci_dev *pdev = NULL; int ret = 0; @@ -1445,7 +1444,7 @@ struct domain *d, u16 seg, u8 bus, u8 devfn, XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); struct pci_dev *pdev; int group_id, sdev_id; u32 bdf; diff -Nru xen-4.6.0/xen/drivers/passthrough/vtd/dmar.c xen-4.6.5/xen/drivers/passthrough/vtd/dmar.c --- xen-4.6.0/xen/drivers/passthrough/vtd/dmar.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/vtd/dmar.c 2017-03-07 16:19:05.000000000 +0000 @@ -298,7 +298,7 @@ scope = start; if ( scope->length < MIN_SCOPE_LEN ) { - dprintk(XENLOG_WARNING VTDPREFIX, "Invalid device scope.\n"); + printk(XENLOG_WARNING VTDPREFIX "Invalid device scope\n"); return -EINVAL; } @@ -360,18 +360,18 @@ sub_bus = pci_conf_read8(seg, bus, path->dev, path->fn, PCI_SUBORDINATE_BUS); if ( iommu_verbose ) - dprintk(VTDPREFIX, - " bridge: %04x:%02x:%02x.%u start=%x sec=%x sub=%x\n", - seg, bus, path->dev, path->fn, - acpi_scope->bus, sec_bus, sub_bus); + printk(VTDPREFIX + " bridge: %04x:%02x:%02x.%u start=%x sec=%x sub=%x\n", + seg, bus, path->dev, path->fn, + acpi_scope->bus, sec_bus, sub_bus); dmar_scope_add_buses(scope, sec_bus, sub_bus); break; case ACPI_DMAR_SCOPE_TYPE_HPET: if ( iommu_verbose ) - dprintk(VTDPREFIX, " MSI HPET: %04x:%02x:%02x.%u\n", - seg, bus, path->dev, path->fn); + printk(VTDPREFIX " MSI HPET: %04x:%02x:%02x.%u\n", + seg, bus, path->dev, path->fn); if ( drhd ) { @@ -392,8 +392,8 @@ case ACPI_DMAR_SCOPE_TYPE_ENDPOINT: if ( iommu_verbose ) - dprintk(VTDPREFIX, " endpoint: %04x:%02x:%02x.%u\n", - seg, bus, path->dev, path->fn); + printk(VTDPREFIX " endpoint: %04x:%02x:%02x.%u\n", + seg, bus, path->dev, path->fn); if ( drhd ) { @@ -406,8 +406,8 @@ case ACPI_DMAR_SCOPE_TYPE_IOAPIC: if ( iommu_verbose ) - dprintk(VTDPREFIX, " IOAPIC: %04x:%02x:%02x.%u\n", - seg, bus, path->dev, path->fn); + printk(VTDPREFIX " IOAPIC: %04x:%02x:%02x.%u\n", + seg, bus, path->dev, path->fn); if ( drhd ) { @@ -449,9 +449,8 @@ { if ( h->length >= min_len ) return 0; - dprintk(XENLOG_ERR VTDPREFIX, - "Invalid ACPI DMAR entry length: %#x\n", - h->length); + printk(XENLOG_ERR VTDPREFIX "Invalid ACPI DMAR entry length: %#x\n", + h->length); return -EINVAL; } @@ -481,8 +480,7 @@ INIT_LIST_HEAD(&dmaru->ioapic_list); INIT_LIST_HEAD(&dmaru->hpet_list); if ( iommu_verbose ) - dprintk(VTDPREFIX, " dmaru->address = %"PRIx64"\n", - dmaru->address); + printk(VTDPREFIX " dmaru->address = %"PRIx64"\n", dmaru->address); ret = iommu_alloc(dmaru); if ( ret ) @@ -496,12 +494,12 @@ if ( dmaru->include_all ) { if ( iommu_verbose ) - dprintk(VTDPREFIX, " flags: INCLUDE_ALL\n"); + printk(VTDPREFIX " flags: INCLUDE_ALL\n"); /* Only allow one INCLUDE_ALL */ if ( drhd->segment == 0 && include_all ) { - dprintk(XENLOG_WARNING VTDPREFIX, - "Only one INCLUDE_ALL device scope is allowed\n"); + printk(XENLOG_WARNING VTDPREFIX + "Only one INCLUDE_ALL device scope is allowed\n"); ret = -EINVAL; } if ( drhd->segment == 0 ) @@ -538,9 +536,9 @@ if ( !pci_device_detect(drhd->segment, b, d, f) ) { - dprintk(XENLOG_WARNING VTDPREFIX, - " Non-existent device (%04x:%02x:%02x.%u) is reported" - " in this DRHD's scope!\n", drhd->segment, b, d, f); + printk(XENLOG_WARNING VTDPREFIX + " Non-existent device (%04x:%02x:%02x.%u) in this DRHD's scope!\n", + drhd->segment, b, d, f); invalid_cnt++; } } @@ -550,9 +548,8 @@ if ( iommu_workaround_bios_bug && invalid_cnt == dmaru->scope.devices_cnt ) { - dprintk(XENLOG_WARNING VTDPREFIX, - " Workaround BIOS bug: ignore the DRHD due to all " - "devices under its scope are not PCI discoverable!\n"); + printk(XENLOG_WARNING VTDPREFIX + " Workaround BIOS bug: ignoring DRHD (no devices in its scope are PCI discoverable)\n"); scope_devices_free(&dmaru->scope); iommu_free(dmaru); @@ -560,11 +557,10 @@ } else { - dprintk(XENLOG_WARNING VTDPREFIX, - " The DRHD is invalid due to there are devices under " - "its scope are not PCI discoverable! Pls try option " - "iommu=force or iommu=workaround_bios_bug if you " - "really want VT-d\n"); + printk(XENLOG_WARNING VTDPREFIX + " DRHD is invalid (some devices in its scope are not PCI discoverable)\n"); + printk(XENLOG_WARNING VTDPREFIX + " Try \"iommu=force\" or \"iommu=workaround_bios_bug\" if you really want VT-d\n"); ret = -EINVAL; } } @@ -613,10 +609,9 @@ if ( (!page_is_ram_type(paddr_to_pfn(base_addr), RAM_TYPE_RESERVED)) || (!page_is_ram_type(paddr_to_pfn(end_addr), RAM_TYPE_RESERVED)) ) { - dprintk(XENLOG_WARNING VTDPREFIX, - " RMRR address range not in reserved memory " - "base = %"PRIx64" end = %"PRIx64"; " - "iommu_inclusive_mapping=1 parameter may be needed.\n", + printk(XENLOG_WARNING VTDPREFIX + " RMRR address range %"PRIx64"..%"PRIx64" not in reserved memory;" + " need \"iommu_inclusive_mapping=1\"?\n", base_addr, end_addr); } @@ -653,11 +648,10 @@ if ( !pci_device_detect(rmrr->segment, b, d, f) ) { - dprintk(XENLOG_WARNING VTDPREFIX, - " Non-existent device (%04x:%02x:%02x.%u) is reported" - " in RMRR (%"PRIx64", %"PRIx64")'s scope!\n", - rmrr->segment, b, d, f, - rmrru->base_address, rmrru->end_address); + printk(XENLOG_WARNING VTDPREFIX + " Non-existent device (%04x:%02x:%02x.%u) reported in RMRR (%"PRIx64", %"PRIx64")'s scope!\n", + rmrr->segment, b, d, f, + rmrru->base_address, rmrru->end_address); ignore = 1; } else @@ -669,18 +663,17 @@ if ( ignore ) { - dprintk(XENLOG_WARNING VTDPREFIX, - " Ignore the RMRR (%"PRIx64", %"PRIx64") due to " - "devices under its scope are not PCI discoverable!\n", - rmrru->base_address, rmrru->end_address); + printk(XENLOG_WARNING VTDPREFIX + " Ignore RMRR (%"PRIx64", %"PRIx64") (some devices in its scope are not PCI discoverable)\n", + rmrru->base_address, rmrru->end_address); scope_devices_free(&rmrru->scope); xfree(rmrru); } else if ( base_addr > end_addr ) { - dprintk(XENLOG_WARNING VTDPREFIX, - " The RMRR (%"PRIx64", %"PRIx64") is incorrect!\n", - rmrru->base_address, rmrru->end_address); + printk(XENLOG_WARNING VTDPREFIX + " RMRR (%"PRIx64", %"PRIx64") is incorrect\n", + rmrru->base_address, rmrru->end_address); scope_devices_free(&rmrru->scope); xfree(rmrru); ret = -EFAULT; @@ -688,10 +681,9 @@ else { if ( iommu_verbose ) - dprintk(VTDPREFIX, - " RMRR region: base_addr %"PRIx64 - " end_address %"PRIx64"\n", - rmrru->base_address, rmrru->end_address); + printk(VTDPREFIX + " RMRR region: base_addr %"PRIx64" end_address %"PRIx64"\n", + rmrru->base_address, rmrru->end_address); acpi_register_rmrr_unit(rmrru); } } @@ -719,8 +711,7 @@ atsru->segment = atsr->segment; atsru->all_ports = atsr->flags & ACPI_DMAR_ALL_PORTS; if ( iommu_verbose ) - dprintk(VTDPREFIX, - " atsru->all_ports: %x\n", atsru->all_ports); + printk(VTDPREFIX " atsru->all_ports: %x\n", atsru->all_ports); if ( !atsru->all_ports ) { dev_scope_start = (void *)(atsr + 1); @@ -731,12 +722,12 @@ else { if ( iommu_verbose ) - dprintk(VTDPREFIX, " flags: ALL_PORTS\n"); + printk(VTDPREFIX " flags: ALL_PORTS\n"); /* Only allow one ALL_PORTS */ if ( atsr->segment == 0 && all_ports ) { - dprintk(XENLOG_WARNING VTDPREFIX, - "Only one ALL_PORTS device scope is allowed\n"); + printk(XENLOG_WARNING VTDPREFIX + "Only one ALL_PORTS device scope is allowed\n"); ret = -EINVAL; } if ( atsr->segment == 0 ) @@ -772,10 +763,9 @@ rhsau->proximity_domain = rhsa->proximity_domain; list_add_tail(&rhsau->list, &acpi_rhsa_units); if ( iommu_verbose ) - dprintk(VTDPREFIX, - " rhsau->address: %"PRIx64 - " rhsau->proximity_domain: %"PRIx32"\n", - rhsau->address, rhsau->proximity_domain); + printk(VTDPREFIX + " rhsau->address: %"PRIx64" rhsau->proximity_domain: %"PRIx32"\n", + rhsau->address, rhsau->proximity_domain); return ret; } @@ -798,15 +788,14 @@ if ( !dmar->width ) { - dprintk(XENLOG_WARNING VTDPREFIX, "Zero: Invalid DMAR width\n"); + printk(XENLOG_WARNING VTDPREFIX "Zero: Invalid DMAR width\n"); ret = -EINVAL; goto out; } dmar_host_address_width = dmar->width + 1; if ( iommu_verbose ) - dprintk(VTDPREFIX, "Host address width %d\n", - dmar_host_address_width); + printk(VTDPREFIX "Host address width %d\n", dmar_host_address_width); entry_header = (void *)(dmar + 1); while ( ((unsigned long)entry_header) < @@ -820,22 +809,22 @@ { case ACPI_DMAR_TYPE_HARDWARE_UNIT: if ( iommu_verbose ) - dprintk(VTDPREFIX, "found ACPI_DMAR_DRHD:\n"); + printk(VTDPREFIX "found ACPI_DMAR_DRHD:\n"); ret = acpi_parse_one_drhd(entry_header); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: if ( iommu_verbose ) - dprintk(VTDPREFIX, "found ACPI_DMAR_RMRR:\n"); + printk(VTDPREFIX "found ACPI_DMAR_RMRR:\n"); ret = acpi_parse_one_rmrr(entry_header); break; case ACPI_DMAR_TYPE_ATSR: if ( iommu_verbose ) - dprintk(VTDPREFIX, "found ACPI_DMAR_ATSR:\n"); + printk(VTDPREFIX "found ACPI_DMAR_ATSR:\n"); ret = acpi_parse_one_atsr(entry_header); break; case ACPI_DMAR_HARDWARE_AFFINITY: if ( iommu_verbose ) - dprintk(VTDPREFIX, "found ACPI_DMAR_RHSA:\n"); + printk(VTDPREFIX "found ACPI_DMAR_RHSA:\n"); ret = acpi_parse_one_rhsa(entry_header); break; default: diff -Nru xen-4.6.0/xen/drivers/passthrough/vtd/intremap.c xen-4.6.5/xen/drivers/passthrough/vtd/intremap.c --- xen-4.6.0/xen/drivers/passthrough/vtd/intremap.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/vtd/intremap.c 2017-03-07 16:19:05.000000000 +0000 @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -143,10 +142,10 @@ set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, hpetid_to_bdf(id)); } -int iommu_supports_eim(void) +bool_t iommu_supports_eim(void) { struct acpi_drhd_unit *drhd; - int apic; + unsigned int apic; if ( !iommu_qinval || !iommu_intremap || list_empty(&acpi_drhd_units) ) return 0; @@ -154,12 +153,12 @@ /* We MUST have a DRHD unit for each IOAPIC. */ for ( apic = 0; apic < nr_ioapics; apic++ ) if ( !ioapic_to_drhd(IO_APIC_ID(apic)) ) - { + { dprintk(XENLOG_WARNING VTDPREFIX, "There is not a DRHD for IOAPIC %#x (id: %#x)!\n", apic, IO_APIC_ID(apic)); return 0; - } + } for_each_drhd_unit ( drhd ) if ( !ecap_queued_inval(drhd->iommu->ecap) || @@ -833,10 +832,10 @@ struct iommu *iommu; if ( !iommu_supports_eim() ) - return -1; + return -EOPNOTSUPP; if ( !platform_supports_x2apic() ) - return -1; + return -ENXIO; for_each_drhd_unit ( drhd ) { @@ -861,7 +860,7 @@ { dprintk(XENLOG_INFO VTDPREFIX, "Failed to enable Queued Invalidation!\n"); - return -1; + return -EIO; } } @@ -873,7 +872,7 @@ { dprintk(XENLOG_INFO VTDPREFIX, "Failed to enable Interrupt Remapping!\n"); - return -1; + return -EIO; } } diff -Nru xen-4.6.0/xen/drivers/passthrough/vtd/iommu.c xen-4.6.5/xen/drivers/passthrough/vtd/iommu.c --- xen-4.6.0/xen/drivers/passthrough/vtd/iommu.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/vtd/iommu.c 2017-03-07 16:19:05.000000000 +0000 @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -253,7 +252,7 @@ { struct acpi_drhd_unit *drhd; struct pci_dev *pdev; - struct hvm_iommu *hd = domain_hvm_iommu(domain); + struct domain_iommu *hd = dom_iommu(domain); int addr_width = agaw_to_width(hd->arch.agaw); struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(hd->arch.agaw); @@ -414,7 +413,7 @@ { struct iommu *iommu = (struct iommu *) _iommu; int tlb_offset = ecap_iotlb_offset(iommu->ecap); - u64 val = 0, val_iva = 0; + u64 val = 0; unsigned long flags; /* @@ -435,7 +434,6 @@ switch ( type ) { case DMA_TLB_GLOBAL_FLUSH: - /* global flush doesn't need set IVA_REG */ val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; break; case DMA_TLB_DSI_FLUSH: @@ -443,8 +441,6 @@ break; case DMA_TLB_PSI_FLUSH: val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); - /* Note: always flush non-leaf currently */ - val_iva = size_order | addr; break; default: BUG(); @@ -457,8 +453,11 @@ spin_lock_irqsave(&iommu->register_lock, flags); /* Note: Only uses first TLB reg currently */ - if ( val_iva ) - dmar_writeq(iommu->reg, tlb_offset, val_iva); + if ( type == DMA_TLB_PSI_FLUSH ) + { + /* Note: always flush non-leaf currently. */ + dmar_writeq(iommu->reg, tlb_offset, size_order | addr); + } dmar_writeq(iommu->reg, tlb_offset + 8, val); /* Make sure hardware complete it */ @@ -561,7 +560,7 @@ static void __intel_iommu_iotlb_flush(struct domain *d, unsigned long gfn, int dma_old_pte_present, unsigned int page_count) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); struct acpi_drhd_unit *drhd; struct iommu *iommu; int flush_dev_iotlb; @@ -612,7 +611,7 @@ /* clear one page's page table */ static void dma_pte_clear_one(struct domain *domain, u64 addr) { - struct hvm_iommu *hd = domain_hvm_iommu(domain); + struct domain_iommu *hd = dom_iommu(domain); struct dma_pte *page = NULL, *pte = NULL; u64 pg_maddr; @@ -713,20 +712,18 @@ { if ( force_iommu ) panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose"); - else - { - dprintk(XENLOG_WARNING VTDPREFIX, - "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n"); - return; - } + + printk(XENLOG_WARNING VTDPREFIX + "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n"); + return; } /* apply platform specific errata workarounds */ vtd_ops_preamble_quirk(iommu); if ( iommu_verbose ) - dprintk(VTDPREFIX, - "iommu_enable_translation: iommu->reg = %p\n", iommu->reg); + printk(VTDPREFIX "iommu_enable_translation: iommu->reg = %p\n", + iommu->reg); spin_lock_irqsave(&iommu->register_lock, flags); sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); dmar_writel(iommu->reg, DMAR_GCMD_REG, sts | DMA_GCMD_TE); @@ -1049,11 +1046,11 @@ return; } - msi_compose_msg(desc->arch.vector, desc->arch.cpu_mask, &msg); - /* Are these overrides really needed? */ + msi_compose_msg(desc->arch.vector, NULL, &msg); + msg.dest32 = dest; if (x2apic_enabled) msg.address_hi = dest & 0xFFFFFF00; - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + ASSERT(!(msg.address_lo & MSI_ADDR_DEST_ID_MASK)); msg.address_lo |= MSI_ADDR_DEST_ID(dest); iommu->msi.msg = msg; @@ -1150,11 +1147,10 @@ if ( iommu_verbose ) { - dprintk(VTDPREFIX, - "drhd->address = %"PRIx64" iommu->reg = %p\n", - drhd->address, iommu->reg); - dprintk(VTDPREFIX, - "cap = %"PRIx64" ecap = %"PRIx64"\n", iommu->cap, iommu->ecap); + printk(VTDPREFIX "drhd->address = %"PRIx64" iommu->reg = %p\n", + drhd->address, iommu->reg); + printk(VTDPREFIX "cap = %"PRIx64" ecap = %"PRIx64"\n", + iommu->cap, iommu->ecap); } if ( !(iommu->cap + 1) || !(iommu->ecap + 1) ) return -ENODEV; @@ -1163,7 +1159,7 @@ cap_num_fault_regs(iommu->cap) * PRIMARY_FAULT_REG_LEN >= PAGE_SIZE || ecap_iotlb_offset(iommu->ecap) >= PAGE_SIZE ) { - dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: unsupported\n"); + printk(XENLOG_ERR VTDPREFIX "IOMMU: unsupported\n"); print_iommu_regs(drhd); return -ENODEV; } @@ -1175,8 +1171,7 @@ break; if ( agaw < 0 ) { - dprintk(XENLOG_ERR VTDPREFIX, - "IOMMU: unsupported sagaw %lx\n", sagaw); + printk(XENLOG_ERR VTDPREFIX "IOMMU: unsupported sagaw %lx\n", sagaw); print_iommu_regs(drhd); return -ENODEV; } @@ -1244,9 +1239,7 @@ static int intel_iommu_domain_init(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); - - hd->arch.agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); + dom_iommu(d)->arch.agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); return 0; } @@ -1280,7 +1273,7 @@ struct iommu *iommu, u8 bus, u8 devfn, const struct pci_dev *pdev) { - struct hvm_iommu *hd = domain_hvm_iommu(domain); + struct domain_iommu *hd = dom_iommu(domain); struct context_entry *context, *context_entries; u64 maddr, pgd_maddr; u16 seg = iommu->intel->drhd->segment; @@ -1433,10 +1426,10 @@ switch ( pdev->type ) { case DEV_TYPE_PCI_HOST_BRIDGE: - if ( iommu_verbose ) - dprintk(VTDPREFIX, "d%d:Hostbridge: skip %04x:%02x:%02x.%u map\n", - domain->domain_id, seg, bus, - PCI_SLOT(devfn), PCI_FUNC(devfn)); + if ( iommu_debug ) + printk(VTDPREFIX "d%d:Hostbridge: skip %04x:%02x:%02x.%u map\n", + domain->domain_id, seg, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); if ( !is_hardware_domain(domain) ) return -EPERM; break; @@ -1447,10 +1440,10 @@ break; case DEV_TYPE_PCIe_ENDPOINT: - if ( iommu_verbose ) - dprintk(VTDPREFIX, "d%d:PCIe: map %04x:%02x:%02x.%u\n", - domain->domain_id, seg, bus, - PCI_SLOT(devfn), PCI_FUNC(devfn)); + if ( iommu_debug ) + printk(VTDPREFIX "d%d:PCIe: map %04x:%02x:%02x.%u\n", + domain->domain_id, seg, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, pdev); if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 ) @@ -1459,10 +1452,10 @@ break; case DEV_TYPE_PCI: - if ( iommu_verbose ) - dprintk(VTDPREFIX, "d%d:PCI: map %04x:%02x:%02x.%u\n", - domain->domain_id, seg, bus, - PCI_SLOT(devfn), PCI_FUNC(devfn)); + if ( iommu_debug ) + printk(VTDPREFIX "d%d:PCI: map %04x:%02x:%02x.%u\n", + domain->domain_id, seg, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, pdev); @@ -1572,10 +1565,10 @@ switch ( pdev->type ) { case DEV_TYPE_PCI_HOST_BRIDGE: - if ( iommu_verbose ) - dprintk(VTDPREFIX, "d%d:Hostbridge: skip %04x:%02x:%02x.%u unmap\n", - domain->domain_id, seg, bus, - PCI_SLOT(devfn), PCI_FUNC(devfn)); + if ( iommu_debug ) + printk(VTDPREFIX "d%d:Hostbridge: skip %04x:%02x:%02x.%u unmap\n", + domain->domain_id, seg, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); if ( !is_hardware_domain(domain) ) return -EPERM; goto out; @@ -1586,10 +1579,10 @@ goto out; case DEV_TYPE_PCIe_ENDPOINT: - if ( iommu_verbose ) - dprintk(VTDPREFIX, "d%d:PCIe: unmap %04x:%02x:%02x.%u\n", - domain->domain_id, seg, bus, - PCI_SLOT(devfn), PCI_FUNC(devfn)); + if ( iommu_debug ) + printk(VTDPREFIX "d%d:PCIe: unmap %04x:%02x:%02x.%u\n", + domain->domain_id, seg, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); ret = domain_context_unmap_one(domain, iommu, bus, devfn); if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 ) disable_ats_device(seg, bus, devfn); @@ -1597,9 +1590,9 @@ break; case DEV_TYPE_PCI: - if ( iommu_verbose ) - dprintk(VTDPREFIX, "d%d:PCI: unmap %04x:%02x:%02x.%u\n", - domain->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + if ( iommu_debug ) + printk(VTDPREFIX "d%d:PCI: unmap %04x:%02x:%02x.%u\n", + domain->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); ret = domain_context_unmap_one(domain, iommu, bus, devfn); if ( ret ) break; @@ -1650,10 +1643,9 @@ if ( found == 0 ) { - struct hvm_iommu *hd = domain_hvm_iommu(domain); int iommu_domid; - clear_bit(iommu->index, &hd->arch.iommu_bitmap); + clear_bit(iommu->index, &dom_iommu(domain)->arch.iommu_bitmap); iommu_domid = domain_iommu_domid(domain, iommu); if ( iommu_domid == -1 ) @@ -1672,7 +1664,7 @@ static void iommu_domain_teardown(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); struct mapped_rmrr *mrmrr, *tmp; if ( list_empty(&acpi_drhd_units) ) @@ -1697,7 +1689,7 @@ struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int flags) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); struct dma_pte *page = NULL, *pte = NULL, old, new = { 0 }; u64 pg_maddr; @@ -1763,7 +1755,7 @@ { struct acpi_drhd_unit *drhd; struct iommu *iommu = NULL; - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); int flush_dev_iotlb; int iommu_domid; @@ -1804,11 +1796,11 @@ */ static void iommu_set_pgd(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); mfn_t pgd_mfn; pgd_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d))); - hd->arch.pgd_maddr = pagetable_get_paddr(pagetable_from_mfn(pgd_mfn)); + dom_iommu(d)->arch.pgd_maddr = + pagetable_get_paddr(pagetable_from_mfn(pgd_mfn)); } static int rmrr_identity_mapping(struct domain *d, bool_t map, @@ -1818,7 +1810,7 @@ unsigned long base_pfn = rmrr->base_address >> PAGE_SHIFT_4K; unsigned long end_pfn = PAGE_ALIGN_4K(rmrr->end_address) >> PAGE_SHIFT_4K; struct mapped_rmrr *mrmrr; - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); ASSERT(spin_is_locked(&pcidevs_lock)); ASSERT(rmrr->base_address < rmrr->end_address); @@ -2508,12 +2500,12 @@ static void vtd_dump_p2m_table(struct domain *d) { - struct hvm_iommu *hd; + const struct domain_iommu *hd; if ( list_empty(&acpi_drhd_units) ) return; - hd = domain_hvm_iommu(d); + hd = dom_iommu(d); printk("p2m table has %d levels\n", agaw_to_level(hd->arch.agaw)); vtd_dump_p2m_table_level(hd->arch.pgd_maddr, agaw_to_level(hd->arch.agaw), 0, 0); } diff -Nru xen-4.6.0/xen/drivers/passthrough/vtd/quirks.c xen-4.6.5/xen/drivers/passthrough/vtd/quirks.c --- xen-4.6.0/xen/drivers/passthrough/vtd/quirks.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/vtd/quirks.c 2017-03-07 16:19:05.000000000 +0000 @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -432,7 +431,6 @@ * - This can cause system failure upon non-fatal VT-d faults. * - Potential security issue if malicious guest trigger VT-d faults. */ - case 0x0e28: /* Xeon-E5v2 (IvyBridge) */ case 0x342e: /* Tylersburg chipset (Nehalem / Westmere systems) */ case 0x3728: /* Xeon C5500/C3500 (JasperForest) */ case 0x3c28: /* Sandybridge */ diff -Nru xen-4.6.0/xen/drivers/passthrough/x86/iommu.c xen-4.6.5/xen/drivers/passthrough/x86/iommu.c --- xen-4.6.0/xen/drivers/passthrough/x86/iommu.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/drivers/passthrough/x86/iommu.c 2017-03-07 16:19:05.000000000 +0000 @@ -41,7 +41,7 @@ int arch_iommu_populate_page_table(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); struct page_info *page; int rc = 0, n = 0; @@ -119,7 +119,7 @@ int arch_iommu_domain_init(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + struct domain_iommu *hd = dom_iommu(d); spin_lock_init(&hd->arch.mapping_lock); INIT_LIST_HEAD(&hd->arch.g2m_ioport_list); @@ -130,7 +130,7 @@ void arch_iommu_domain_destroy(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); + const struct domain_iommu *hd = dom_iommu(d); struct list_head *ioport_list, *tmp; struct g2m_ioport *ioport; diff -Nru xen-4.6.0/xen/include/asm-arm/arm32/processor.h xen-4.6.5/xen/include/asm-arm/arm32/processor.h --- xen-4.6.0/xen/include/asm-arm/arm32/processor.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/arm32/processor.h 2017-03-07 16:19:05.000000000 +0000 @@ -55,6 +55,17 @@ uint32_t pad1; /* Doubleword-align the user half of the frame */ }; + +/* Functions for pending virtual abort checking window. */ +void abort_guest_exit_start(void); +void abort_guest_exit_end(void); + +#define VABORT_GEN_BY_GUEST(r) \ +( \ + ( (unsigned long)abort_guest_exit_start == (r)->pc ) || \ + ( (unsigned long)abort_guest_exit_end == (r)->pc ) \ +) + #endif /* Layout as used in assembly, with src/dest registers mixed in */ diff -Nru xen-4.6.0/xen/include/asm-arm/arm32/system.h xen-4.6.5/xen/include/asm-arm/arm32/system.h --- xen-4.6.0/xen/include/asm-arm/arm32/system.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/arm32/system.h 2017-03-07 16:19:05.000000000 +0000 @@ -24,7 +24,7 @@ asm volatile ( \ "msr cpsr_c, %0 @ local_irq_restore\n" \ : \ - : "r" (flags) \ + : "r" (x) \ : "memory", "cc"); \ }) diff -Nru xen-4.6.0/xen/include/asm-arm/arm64/system.h xen-4.6.5/xen/include/asm-arm/arm64/system.h --- xen-4.6.0/xen/include/asm-arm/arm64/system.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/arm64/system.h 2017-03-07 16:19:05.000000000 +0000 @@ -40,7 +40,7 @@ asm volatile ( \ "msr daif, %0 // local_irq_restore" \ : \ - : "r" (flags) \ + : "r" (x) \ : "memory"); \ }) diff -Nru xen-4.6.0/xen/include/asm-arm/config.h xen-4.6.5/xen/include/asm-arm/config.h --- xen-4.6.0/xen/include/asm-arm/config.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/config.h 2017-03-07 16:19:05.000000000 +0000 @@ -39,6 +39,10 @@ #define CONFIG_IRQ_HAS_MULTIPLE_ACTION 1 +#define CONFIG_PAGEALLOC_MAX_ORDER 18 +#define CONFIG_DOMU_MAX_ORDER 9 +#define CONFIG_HWDOM_MAX_ORDER 10 + #define OPT_CONSOLE_STR "dtuart" #ifdef MAX_PHYS_CPUS diff -Nru xen-4.6.0/xen/include/asm-arm/domain.h xen-4.6.5/xen/include/asm-arm/domain.h --- xen-4.6.0/xen/include/asm-arm/domain.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/domain.h 2017-03-07 16:19:05.000000000 +0000 @@ -11,12 +11,10 @@ #include #include #include -#include struct hvm_domain { uint64_t params[HVM_NR_PARAMS]; - struct hvm_iommu iommu; bool_t introspection_enabled; } __cacheline_aligned; diff -Nru xen-4.6.0/xen/include/asm-arm/gic.h xen-4.6.5/xen/include/asm-arm/gic.h --- xen-4.6.0/xen/include/asm-arm/gic.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/gic.h 2017-03-07 16:19:05.000000000 +0000 @@ -42,6 +42,8 @@ #define GICD_IPRIORITYR (0x400) #define GICD_IPRIORITYRN (0x7F8) #define GICD_ITARGETSR (0x800) +#define GICD_ITARGETSR7 (0x81C) +#define GICD_ITARGETSR8 (0x820) #define GICD_ITARGETSRN (0xBF8) #define GICD_ICFGR (0xC00) #define GICD_ICFGRN (0xCFC) diff -Nru xen-4.6.0/xen/include/asm-arm/hvm/iommu.h xen-4.6.5/xen/include/asm-arm/hvm/iommu.h --- xen-4.6.0/xen/include/asm-arm/hvm/iommu.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/hvm/iommu.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,10 +0,0 @@ -#ifndef __ASM_ARM_HVM_IOMMU_H_ -#define __ASM_ARM_HVM_IOMMU_H_ - -struct arch_hvm_iommu -{ - /* Private information for the IOMMU drivers */ - void *priv; -}; - -#endif /* __ASM_ARM_HVM_IOMMU_H_ */ diff -Nru xen-4.6.0/xen/include/asm-arm/iocap.h xen-4.6.5/xen/include/asm-arm/iocap.h --- xen-4.6.0/xen/include/asm-arm/iocap.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/iocap.h 2017-03-07 16:19:05.000000000 +0000 @@ -4,10 +4,6 @@ #define cache_flush_permitted(d) \ (!rangeset_is_empty((d)->iomem_caps)) -#define multipage_allocation_permitted(d, order) \ - (((order) <= 9) || /* allow 2MB superpages */ \ - !rangeset_is_empty((d)->iomem_caps)) - #endif /* diff -Nru xen-4.6.0/xen/include/asm-arm/iommu.h xen-4.6.5/xen/include/asm-arm/iommu.h --- xen-4.6.0/xen/include/asm-arm/iommu.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/iommu.h 2017-03-07 16:19:05.000000000 +0000 @@ -14,9 +14,14 @@ #ifndef __ARCH_ARM_IOMMU_H__ #define __ARCH_ARM_IOMMU_H__ +struct arch_iommu +{ + /* Private information for the IOMMU drivers */ + void *priv; +}; + /* Always share P2M Table between the CPU and the IOMMU */ #define iommu_use_hap_pt(d) (1) -#define domain_hvm_iommu(d) (&d->arch.hvm_domain.iommu) const struct iommu_ops *iommu_get_ops(void); void __init iommu_set_ops(const struct iommu_ops *ops); diff -Nru xen-4.6.0/xen/include/asm-arm/mm.h xen-4.6.5/xen/include/asm-arm/mm.h --- xen-4.6.0/xen/include/asm-arm/mm.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/mm.h 2017-03-07 16:19:05.000000000 +0000 @@ -275,7 +275,7 @@ return mfn_to_virt(page_to_mfn(pg)); } -struct page_info *get_page_from_gva(struct domain *d, vaddr_t va, +struct page_info *get_page_from_gva(struct vcpu *v, vaddr_t va, unsigned long flags); /* diff -Nru xen-4.6.0/xen/include/asm-arm/numa.h xen-4.6.5/xen/include/asm-arm/numa.h --- xen-4.6.0/xen/include/asm-arm/numa.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/numa.h 2017-03-07 16:19:05.000000000 +0000 @@ -17,6 +17,11 @@ #define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx)) #define __node_distance(a, b) (20) +static inline unsigned int arch_get_dma_bitsize(void) +{ + return 32; +} + #endif /* __ARCH_ARM_NUMA_H */ /* * Local variables: diff -Nru xen-4.6.0/xen/include/asm-arm/processor.h xen-4.6.5/xen/include/asm-arm/processor.h --- xen-4.6.0/xen/include/asm-arm/processor.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/processor.h 2017-03-07 16:19:05.000000000 +0000 @@ -565,6 +565,13 @@ #define FSC_LL_MASK (_AC(0x03,U)<<0) +/* HPFAR_EL2: Hypervisor IPA Fault Address Register */ +#ifdef CONFIG_ARM_64 +#define HPFAR_MASK GENMASK(39, 4) +#else +#define HPFAR_MASK GENMASK(31, 4) +#endif + /* Time counter hypervisor control register */ #define CNTHCTL_EL2_EL1PCTEN (1u<<0) /* Kernel/user access to physical counter */ #define CNTHCTL_EL2_EL1PCEN (1u<<1) /* Kernel/user access to CNTP timer regs */ @@ -635,6 +642,8 @@ int call_smc(register_t function_id, register_t arg0, register_t arg1, register_t arg2); +void do_trap_guest_error(struct cpu_user_regs *regs); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ARM_PROCESSOR_H */ /* diff -Nru xen-4.6.0/xen/include/asm-arm/spinlock.h xen-4.6.5/xen/include/asm-arm/spinlock.h --- xen-4.6.0/xen/include/asm-arm/spinlock.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/spinlock.h 2017-03-07 16:19:05.000000000 +0000 @@ -1,6 +1,13 @@ #ifndef __ASM_SPINLOCK_H #define __ASM_SPINLOCK_H -/* Nothing ARM specific. */ +#define arch_lock_acquire_barrier() smp_mb() +#define arch_lock_release_barrier() smp_mb() + +#define arch_lock_relax() wfe() +#define arch_lock_signal() do { \ + dsb(ishst); \ + sev(); \ +} while(0) #endif /* __ASM_SPINLOCK_H */ diff -Nru xen-4.6.0/xen/include/asm-arm/system.h xen-4.6.5/xen/include/asm-arm/system.h --- xen-4.6.0/xen/include/asm-arm/system.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-arm/system.h 2017-03-07 16:19:05.000000000 +0000 @@ -53,9 +53,6 @@ #define arch_fetch_and_add(x, v) __sync_fetch_and_add(x, v) -#define arch_lock_acquire_barrier() smp_mb() -#define arch_lock_release_barrier() smp_mb() - extern struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next); #endif diff -Nru xen-4.6.0/xen/include/asm-x86/asm_defns.h xen-4.6.5/xen/include/asm-x86/asm_defns.h --- xen-4.6.0/xen/include/asm-x86/asm_defns.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/asm_defns.h 2017-03-07 16:19:05.000000000 +0000 @@ -176,11 +176,23 @@ 662: __ASM_##op; \ .popsection; \ .pushsection .altinstructions, "a"; \ + altinstruction_entry 661b, 661b, X86_FEATURE_ALWAYS, 3, 0; \ altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3; \ .popsection #define ASM_STAC ASM_AC(STAC) #define ASM_CLAC ASM_AC(CLAC) + +#define CR4_PV32_RESTORE \ + 667: ASM_NOP5; \ + .pushsection .altinstr_replacement, "ax"; \ + 668: call cr4_pv32_restore; \ + .section .altinstructions, "a"; \ + altinstruction_entry 667b, 667b, X86_FEATURE_ALWAYS, 5, 0; \ + altinstruction_entry 667b, 668b, X86_FEATURE_SMEP, 5, 5; \ + altinstruction_entry 667b, 668b, X86_FEATURE_SMAP, 5, 5; \ + .popsection + #else static always_inline void clac(void) { @@ -280,14 +292,18 @@ * * For the way it is used in RESTORE_ALL, this macro must preserve EFLAGS.ZF. */ -.macro LOAD_C_CLOBBERED compat=0 +.macro LOAD_C_CLOBBERED compat=0 ax=1 .if !\compat movq UREGS_r11(%rsp),%r11 movq UREGS_r10(%rsp),%r10 movq UREGS_r9(%rsp),%r9 movq UREGS_r8(%rsp),%r8 -.endif +.if \ax movq UREGS_rax(%rsp),%rax +.endif +.elseif \ax + movl UREGS_rax(%rsp),%eax +.endif movq UREGS_rcx(%rsp),%rcx movq UREGS_rdx(%rsp),%rdx movq UREGS_rsi(%rsp),%rsi diff -Nru xen-4.6.0/xen/include/asm-x86/config.h xen-4.6.5/xen/include/asm-x86/config.h --- xen-4.6.0/xen/include/asm-x86/config.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/config.h 2017-03-07 16:19:05.000000000 +0000 @@ -28,9 +28,12 @@ #define CONFIG_NUMA 1 #define CONFIG_DISCONTIGMEM 1 #define CONFIG_NUMA_EMU 1 -#define CONFIG_PAGEALLOC_MAX_ORDER (2 * PAGETABLE_ORDER) #define CONFIG_DOMAIN_PAGE 1 +#define CONFIG_PAGEALLOC_MAX_ORDER (2 * PAGETABLE_ORDER) +#define CONFIG_DOMU_MAX_ORDER PAGETABLE_ORDER +#define CONFIG_HWDOM_MAX_ORDER 12 + /* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */ #define CONFIG_X86_L1_CACHE_SHIFT 7 diff -Nru xen-4.6.0/xen/include/asm-x86/cpufeature.h xen-4.6.5/xen/include/asm-x86/cpufeature.h --- xen-4.6.0/xen/include/asm-x86/cpufeature.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/cpufeature.h 2017-03-07 16:19:05.000000000 +0000 @@ -135,6 +135,7 @@ #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ #define X86_FEATURE_DBEXT (6*32+26) /* data breakpoint extension */ +#define X86_FEATURE_MWAITX (6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 7 */ #define X86_FEATURE_FSGSBASE (7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */ @@ -154,6 +155,9 @@ #define X86_FEATURE_ADX (7*32+19) /* ADCX, ADOX instructions */ #define X86_FEATURE_SMAP (7*32+20) /* Supervisor Mode Access Prevention */ +/* An alias of a feature we know is always going to be present. */ +#define X86_FEATURE_ALWAYS X86_FEATURE_LM + #if !defined(__ASSEMBLY__) && !defined(X86_FEATURES_ONLY) #include diff -Nru xen-4.6.0/xen/include/asm-x86/current.h xen-4.6.5/xen/include/asm-x86/current.h --- xen-4.6.0/xen/include/asm-x86/current.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/current.h 2017-03-07 16:19:05.000000000 +0000 @@ -41,8 +41,8 @@ unsigned int processor_id; struct vcpu *current_vcpu; unsigned long per_cpu_offset; + unsigned long cr4; /* get_stack_bottom() must be 16-byte aligned */ - unsigned long __pad_for_stack_bottom; }; static inline struct cpu_info *get_cpu_info(void) diff -Nru xen-4.6.0/xen/include/asm-x86/domain.h xen-4.6.5/xen/include/asm-x86/domain.h --- xen-4.6.0/xen/include/asm-x86/domain.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/domain.h 2017-03-07 16:19:05.000000000 +0000 @@ -193,6 +193,9 @@ /* log dirty support */ struct log_dirty_domain log_dirty; + /* Number of valid bits in a gfn. */ + unsigned int gfn_bits; + /* preemption handling */ struct { const struct domain *dom; @@ -247,6 +250,8 @@ { l1_pgentry_t **gdt_ldt_l1tab; + atomic_t nr_l4_pages; + /* map_domain_page() mapping cache. */ struct mapcache_domain mapcache; }; @@ -336,6 +341,21 @@ u8 x86_vendor; /* CPU vendor */ u8 x86_model; /* CPU model */ + /* + * The width of the FIP/FDP register in the FPU that needs to be + * saved/restored during a context switch. This is needed because + * the FPU can either: a) restore the 64-bit FIP/FDP and clear FCS + * and FDS; or b) restore the 32-bit FIP/FDP (clearing the upper + * 32-bits of FIP/FDP) and restore FCS/FDS. + * + * Which one is needed depends on the guest. + * + * This can be either: 8, 4 or 0. 0 means auto-detect the size + * based on the width of FIP/FDP values that are written by the + * guest. + */ + uint8_t x87_fip_width; + cpuid_input_t *cpuids; struct PITState vpit; diff -Nru xen-4.6.0/xen/include/asm-x86/flushtlb.h xen-4.6.5/xen/include/asm-x86/flushtlb.h --- xen-4.6.0/xen/include/asm-x86/flushtlb.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/flushtlb.h 2017-03-07 16:19:05.000000000 +0000 @@ -85,6 +85,8 @@ #define FLUSH_TLB_GLOBAL 0x200 /* Flush data caches */ #define FLUSH_CACHE 0x400 + /* VA for the flush has a valid mapping */ +#define FLUSH_VA_VALID 0x800 /* Flush local TLBs/caches. */ void flush_area_local(const void *va, unsigned int flags); diff -Nru xen-4.6.0/xen/include/asm-x86/guest_pt.h xen-4.6.5/xen/include/asm-x86/guest_pt.h --- xen-4.6.0/xen/include/asm-x86/guest_pt.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/guest_pt.h 2017-03-07 16:19:05.000000000 +0000 @@ -210,15 +210,17 @@ } -/* Some bits are invalid in any pagetable entry. */ -#if GUEST_PAGING_LEVELS == 2 -#define _PAGE_INVALID_BITS (0) -#elif GUEST_PAGING_LEVELS == 3 +/* + * Some bits are invalid in any pagetable entry. + * Normal flags values get represented in 24-bit values (see + * get_pte_flags() and put_pte_flags()), so set bit 24 in + * addition to be able to flag out of range frame numbers. + */ +#if GUEST_PAGING_LEVELS == 3 #define _PAGE_INVALID_BITS \ - get_pte_flags(((1ull<<63) - 1) & ~((1ull< #include #include -#include #include #include #include @@ -123,9 +122,6 @@ spinlock_t uc_lock; bool_t is_in_uc_mode; - /* Pass-through */ - struct hvm_iommu hvm_iommu; - /* hypervisor intercepted msix table */ struct list_head msixtbl_list; spinlock_t msixtbl_list_lock; diff -Nru xen-4.6.0/xen/include/asm-x86/hvm/emulate.h xen-4.6.5/xen/include/asm-x86/hvm/emulate.h --- xen-4.6.0/xen/include/asm-x86/hvm/emulate.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/hvm/emulate.h 2017-03-07 16:19:05.000000000 +0000 @@ -13,6 +13,7 @@ #define __ASM_X86_HVM_EMULATE_H__ #include +#include #include #include @@ -57,6 +58,7 @@ struct segment_register *hvmemul_get_seg_reg( enum x86_segment seg, struct hvm_emulate_ctxt *hvmemul_ctxt); +int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla); int hvmemul_do_pio_buffer(uint16_t port, unsigned int size, diff -Nru xen-4.6.0/xen/include/asm-x86/hvm/hvm.h xen-4.6.5/xen/include/asm-x86/hvm/hvm.h --- xen-4.6.0/xen/include/asm-x86/hvm/hvm.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/hvm/hvm.h 2017-03-07 16:19:05.000000000 +0000 @@ -367,6 +367,7 @@ unsigned int *ecx, unsigned int *edx); void hvm_migrate_timers(struct vcpu *v); bool_t hvm_io_pending(struct vcpu *v); +bool_t is_ioreq_server_page(struct domain *d, const struct page_info *page); void hvm_do_resume(struct vcpu *v); void hvm_migrate_pirqs(struct vcpu *v); @@ -384,7 +385,10 @@ (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE)) /* These exceptions must always be intercepted. */ -#define HVM_TRAP_MASK ((1U << TRAP_machine_check) | (1U << TRAP_invalid_op)) +#define HVM_TRAP_MASK ((1U << TRAP_debug) | \ + (1U << TRAP_invalid_op) | \ + (1U << TRAP_alignment_check) | \ + (1U << TRAP_machine_check)) /* * x86 event types. This enumeration is valid for: diff -Nru xen-4.6.0/xen/include/asm-x86/hvm/io.h xen-4.6.5/xen/include/asm-x86/hvm/io.h --- xen-4.6.0/xen/include/asm-x86/hvm/io.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/hvm/io.h 2017-03-07 16:19:05.000000000 +0000 @@ -128,13 +128,19 @@ void msix_write_completion(struct vcpu *); void msixtbl_init(struct domain *d); +enum stdvga_cache_state { + STDVGA_CACHE_UNINITIALIZED, + STDVGA_CACHE_ENABLED, + STDVGA_CACHE_DISABLED +}; + struct hvm_hw_stdvga { uint8_t sr_index; uint8_t sr[8]; uint8_t gr_index; uint8_t gr[9]; bool_t stdvga; - bool_t cache; + enum stdvga_cache_state cache; uint32_t latch; struct page_info *vram_page[64]; /* shadow of 0xa0000-0xaffff */ spinlock_t lock; diff -Nru xen-4.6.0/xen/include/asm-x86/hvm/iommu.h xen-4.6.5/xen/include/asm-x86/hvm/iommu.h --- xen-4.6.0/xen/include/asm-x86/hvm/iommu.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/hvm/iommu.h 2017-03-07 16:19:05.000000000 +0000 @@ -48,7 +48,7 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 -struct arch_hvm_iommu +struct arch_iommu { u64 pgd_maddr; /* io page directory machine address */ spinlock_t mapping_lock; /* io page table lock */ diff -Nru xen-4.6.0/xen/include/asm-x86/hvm/nestedhvm.h xen-4.6.5/xen/include/asm-x86/hvm/nestedhvm.h --- xen-4.6.0/xen/include/asm-x86/hvm/nestedhvm.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/hvm/nestedhvm.h 2017-03-07 16:19:05.000000000 +0000 @@ -56,6 +56,10 @@ int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa, bool_t access_r, bool_t access_w, bool_t access_x); +int nestedhap_walk_L1_p2m(struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa, + unsigned int *page_order, uint8_t *p2m_acc, + bool_t access_r, bool_t access_w, bool_t access_x); + /* IO permission map */ unsigned long *nestedhvm_vcpu_iomap_get(bool_t ioport_80, bool_t ioport_ed); diff -Nru xen-4.6.0/xen/include/asm-x86/hvm/vcpu.h xen-4.6.5/xen/include/asm-x86/hvm/vcpu.h --- xen-4.6.0/xen/include/asm-x86/hvm/vcpu.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/hvm/vcpu.h 2017-03-07 16:19:05.000000000 +0000 @@ -85,6 +85,8 @@ bool_t mmio_retry; unsigned long msix_unmask_address; + unsigned long msix_snoop_address; + unsigned long msix_snoop_gpa; const struct g2m_ioport *g2m_ioport; }; diff -Nru xen-4.6.0/xen/include/asm-x86/hvm/vmx/vmcs.h xen-4.6.5/xen/include/asm-x86/hvm/vmx/vmcs.h --- xen-4.6.0/xen/include/asm-x86/hvm/vmx/vmcs.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/hvm/vmx/vmcs.h 2017-03-07 16:19:05.000000000 +0000 @@ -157,6 +157,7 @@ void vmx_vmcs_enter(struct vcpu *v); bool_t __must_check vmx_vmcs_try_enter(struct vcpu *v); void vmx_vmcs_exit(struct vcpu *v); +void vmx_vmcs_reload(struct vcpu *v); #define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 #define CPU_BASED_USE_TSC_OFFSETING 0x00000008 @@ -291,6 +292,9 @@ (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS) #define cpu_has_vmx_pml \ (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_PML) +#define cpu_has_vmx_mpx \ + ((vmx_vmexit_control & VM_EXIT_CLEAR_BNDCFGS) && \ + (vmx_vmentry_control & VM_ENTRY_LOAD_BNDCFGS)) #define VMCS_RID_TYPE_MASK 0x80000000 @@ -484,6 +488,10 @@ #define VMX_GUEST_MSR 0 #define VMX_HOST_MSR 1 +/* VM Instruction error numbers. */ +#define VMX_INSN_INVALID_CONTROL_STATE 7 +#define VMX_INSN_INVALID_HOST_STATE 8 + void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type); void vmx_enable_intercept_for_msr(struct vcpu *v, u32 msr, int type); int vmx_read_guest_msr(u32 msr, u64 *val); diff -Nru xen-4.6.0/xen/include/asm-x86/iocap.h xen-4.6.5/xen/include/asm-x86/iocap.h --- xen-4.6.0/xen/include/asm-x86/iocap.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/iocap.h 2017-03-07 16:19:05.000000000 +0000 @@ -18,9 +18,4 @@ (!rangeset_is_empty((d)->iomem_caps) || \ !rangeset_is_empty((d)->arch.ioport_caps)) -#define multipage_allocation_permitted(d, order) \ - (((order) <= 9) || /* allow 2MB superpages */ \ - !rangeset_is_empty((d)->iomem_caps) || \ - !rangeset_is_empty((d)->arch.ioport_caps)) - #endif /* __X86_IOCAP_H__ */ diff -Nru xen-4.6.0/xen/include/asm-x86/iommu.h xen-4.6.5/xen/include/asm-x86/iommu.h --- xen-4.6.0/xen/include/asm-x86/iommu.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/iommu.h 2017-03-07 16:19:05.000000000 +0000 @@ -14,11 +14,12 @@ #ifndef __ARCH_X86_IOMMU_H__ #define __ARCH_X86_IOMMU_H__ +#include /* For now - should really be merged here. */ + #define MAX_IOMMUS 32 /* Does this domain have a P2M table we can use as its IOMMU pagetable? */ #define iommu_use_hap_pt(d) (hap_enabled(d) && iommu_hap_pt_share) -#define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu) void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value); unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg); @@ -27,7 +28,7 @@ /* While VT-d specific, this must get declared in a generic header. */ int adjust_vtd_irq_affinities(void); void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present); -int iommu_supports_eim(void); +bool_t iommu_supports_eim(void); int iommu_enable_x2apic_IR(void); void iommu_disable_x2apic_IR(void); diff -Nru xen-4.6.0/xen/include/asm-x86/mm.h xen-4.6.5/xen/include/asm-x86/mm.h --- xen-4.6.0/xen/include/asm-x86/mm.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/mm.h 2017-03-07 16:19:05.000000000 +0000 @@ -7,6 +7,7 @@ #include #include #include +#include /* * Per-page-frame information. @@ -320,7 +321,7 @@ void init_guest_l4_table(l4_pgentry_t[], const struct domain *, bool_t zap_ro_mpt); -void fill_ro_mpt(unsigned long mfn); +bool_t fill_ro_mpt(unsigned long mfn); void zap_ro_mpt(unsigned long mfn); int is_iomem_page(unsigned long mfn); @@ -488,6 +489,22 @@ void memguard_guard_stack(void *p); void memguard_unguard_stack(void *p); +struct mmio_ro_emulate_ctxt { + unsigned long cr2; + unsigned int seg, bdf; +}; + +extern int mmio_ro_emulated_write(enum x86_segment seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); +extern int mmcfg_intercept_write(enum x86_segment seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); + int ptwr_do_page_fault(struct vcpu *, unsigned long, struct cpu_user_regs *); int mmio_ro_do_page_fault(struct vcpu *, unsigned long, diff -Nru xen-4.6.0/xen/include/asm-x86/mpspec.h xen-4.6.5/xen/include/asm-x86/mpspec.h --- xen-4.6.0/xen/include/asm-x86/mpspec.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/mpspec.h 2017-03-07 16:19:05.000000000 +0000 @@ -43,6 +43,19 @@ #define physid_isset(physid, map) test_bit(physid, (map).mask) #define physid_test_and_set(physid, map) test_and_set_bit(physid, (map).mask) +#define first_physid(map) find_first_bit((map).mask, \ + MAX_APICS) +#define next_physid(id, map) find_next_bit((map).mask, \ + MAX_APICS, (id) + 1) +#define last_physid(map) ({ \ + const unsigned long *mask = (map).mask; \ + unsigned int id, last = MAX_APICS; \ + for (id = find_first_bit(mask, MAX_APICS); id < MAX_APICS; \ + id = find_next_bit(mask, MAX_APICS, (id) + 1)) \ + last = id; \ + last; \ +}) + #define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) #define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) #define physids_clear(map) bitmap_zero((map).mask, MAX_APICS) diff -Nru xen-4.6.0/xen/include/asm-x86/msr-index.h xen-4.6.5/xen/include/asm-x86/msr-index.h --- xen-4.6.0/xen/include/asm-x86/msr-index.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/msr-index.h 2017-03-07 16:19:05.000000000 +0000 @@ -56,7 +56,10 @@ #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 -#define MSR_IA32_BNDCFGS 0x00000D90 +#define MSR_IA32_BNDCFGS 0x00000d90 +#define IA32_BNDCFGS_ENABLE 0x00000001 +#define IA32_BNDCFGS_PRESERVE 0x00000002 +#define IA32_BNDCFGS_RESERVED 0x00000ffc #define MSR_MTRRfix64K_00000 0x00000250 #define MSR_MTRRfix16K_80000 0x00000258 @@ -200,10 +203,11 @@ /* AMD64 MSRs */ #define MSR_AMD64_NB_CFG 0xc001001f +#define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT 46 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_IC_CFG 0xc0011021 #define MSR_AMD64_DC_CFG 0xc0011022 -#define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT 46 +#define MSR_AMD64_DE_CFG 0xc0011029 #define MSR_AMD64_DR0_ADDRESS_MASK 0xc0011027 #define MSR_AMD64_DR1_ADDRESS_MASK 0xc0011019 diff -Nru xen-4.6.0/xen/include/asm-x86/mtrr.h xen-4.6.5/xen/include/asm-x86/mtrr.h --- xen-4.6.0/xen/include/asm-x86/mtrr.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/mtrr.h 2017-03-07 16:19:05.000000000 +0000 @@ -91,7 +91,7 @@ extern void memory_type_changed(struct domain *); extern bool_t pat_msr_set(uint64_t *pat, uint64_t msr); -bool_t is_var_mtrr_overlapped(struct mtrr_state *m); +bool_t is_var_mtrr_overlapped(const struct mtrr_state *m); bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs); #endif /* __ASM_X86_MTRR_H__ */ diff -Nru xen-4.6.0/xen/include/asm-x86/numa.h xen-4.6.5/xen/include/asm-x86/numa.h --- xen-4.6.0/xen/include/asm-x86/numa.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/numa.h 2017-03-07 16:19:05.000000000 +0000 @@ -86,5 +86,6 @@ void srat_parse_regions(u64 addr); extern u8 __node_distance(nodeid_t a, nodeid_t b); +unsigned int arch_get_dma_bitsize(void); #endif diff -Nru xen-4.6.0/xen/include/asm-x86/p2m.h xen-4.6.5/xen/include/asm-x86/p2m.h --- xen-4.6.0/xen/include/asm-x86/p2m.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/p2m.h 2017-03-07 16:19:05.000000000 +0000 @@ -141,6 +141,11 @@ | p2m_to_mask(p2m_ram_logdirty) ) #define P2M_SHARED_TYPES (p2m_to_mask(p2m_ram_shared)) +/* Valid types not necessarily associated with a (valid) MFN. */ +#define P2M_INVALID_MFN_TYPES (P2M_POD_TYPES \ + | p2m_to_mask(p2m_mmio_direct) \ + | P2M_PAGING_TYPES) + /* Broken type: the frame backing this pfn has failed in hardware * and must not be touched. */ #define P2M_BROKEN_TYPES (p2m_to_mask(p2m_ram_broken)) @@ -171,6 +176,8 @@ (P2M_RAM_TYPES | P2M_GRANT_TYPES | \ p2m_to_mask(p2m_map_foreign))) +#define p2m_allows_invalid_mfn(t) (p2m_to_mask(t) & P2M_INVALID_MFN_TYPES) + typedef enum { p2m_host, p2m_nested, @@ -292,10 +299,20 @@ entry_count; /* # of pages in p2m marked pod */ unsigned long reclaim_single; /* Last gpfn of a scan */ unsigned long max_guest; /* gpfn of max guest demand-populate */ -#define POD_HISTORY_MAX 128 - /* gpfn of last guest superpage demand-populated */ - unsigned long last_populated[POD_HISTORY_MAX]; - unsigned int last_populated_index; + + /* + * Tracking of the most recently populated PoD pages, for eager + * reclamation. + */ + struct pod_mrp_list { +#define NR_POD_MRP_ENTRIES 32 + +/* Encode ORDER_2M superpage in top bit of GFN */ +#define POD_LAST_SUPERPAGE (INVALID_GFN & ~(INVALID_GFN >> 1)) + + unsigned long list[NR_POD_MRP_ENTRIES]; + unsigned int idx; + } mrp; mm_lock_t lock; /* Locking of private pod structs, * * not relying on the p2m lock. */ } pod; @@ -578,7 +595,7 @@ /* Move all pages from the populate-on-demand cache to the domain page_list * (usually in preparation for domain destruction) */ -void p2m_pod_empty_cache(struct domain *d); +int p2m_pod_empty_cache(struct domain *d); /* Set populate-on-demand cache size so that the total memory allocated to a * domain matches target */ diff -Nru xen-4.6.0/xen/include/asm-x86/page.h xen-4.6.5/xen/include/asm-x86/page.h --- xen-4.6.0/xen/include/asm-x86/page.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/page.h 2017-03-07 16:19:05.000000000 +0000 @@ -157,6 +157,9 @@ #define l3e_remove_flags(x, flags) ((x).l3 &= ~put_pte_flags(flags)) #define l4e_remove_flags(x, flags) ((x).l4 &= ~put_pte_flags(flags)) +/* Flip flags in an existing L1 PTE. */ +#define l1e_flip_flags(x, flags) ((x).l1 ^= put_pte_flags(flags)) + /* Check if a pte's page mapping or significant access flags have changed. */ #define l1e_has_changed(x,y,flags) \ ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) @@ -308,6 +311,7 @@ #define _PAGE_AVAIL2 _AC(0x800,U) #define _PAGE_AVAIL _AC(0xE00,U) #define _PAGE_PSE_PAT _AC(0x1000,U) +#define _PAGE_AVAIL_HIGH (_AC(0x7ff, U) << 12) #define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0) /* non-architectural flags */ #define _PAGE_PAGED 0x2000U diff -Nru xen-4.6.0/xen/include/asm-x86/paging.h xen-4.6.5/xen/include/asm-x86/paging.h --- xen-4.6.0/xen/include/asm-x86/paging.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/paging.h 2017-03-07 16:19:05.000000000 +0000 @@ -245,7 +245,9 @@ * or 0 if it's safe not to do so. */ static inline int paging_invlpg(struct vcpu *v, unsigned long va) { - return paging_get_hostmode(v)->invlpg(v, va); + return (paging_mode_external(v->domain) ? is_canonical_address(va) + : __addr_ok(va)) && + paging_get_hostmode(v)->invlpg(v, va); } /* Translate a guest virtual address to the frame number that the diff -Nru xen-4.6.0/xen/include/asm-x86/pci.h xen-4.6.5/xen/include/asm-x86/pci.h --- xen-4.6.0/xen/include/asm-x86/pci.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/pci.h 2017-03-07 16:19:05.000000000 +0000 @@ -23,4 +23,7 @@ bool_t pci_mmcfg_decode(unsigned long mfn, unsigned int *seg, unsigned int *bdf); +bool_t pci_ro_mmcfg_decode(unsigned long mfn, unsigned int *seg, + unsigned int *bdf); + #endif /* __X86_PCI_H__ */ diff -Nru xen-4.6.0/xen/include/asm-x86/processor.h xen-4.6.5/xen/include/asm-x86/processor.h --- xen-4.6.0/xen/include/asm-x86/processor.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/processor.h 2017-03-07 16:19:05.000000000 +0000 @@ -135,16 +135,18 @@ #define TF_kernel_mode (1<<_TF_kernel_mode) /* #PF error code values. */ -#define PFEC_page_present (1U<<0) -#define PFEC_write_access (1U<<1) -#define PFEC_user_mode (1U<<2) -#define PFEC_reserved_bit (1U<<3) -#define PFEC_insn_fetch (1U<<4) +#define PFEC_page_present (_AC(1,U) << 0) +#define PFEC_write_access (_AC(1,U) << 1) +#define PFEC_user_mode (_AC(1,U) << 2) +#define PFEC_reserved_bit (_AC(1,U) << 3) +#define PFEC_insn_fetch (_AC(1,U) << 4) #define PFEC_page_paged (1U<<5) #define PFEC_page_shared (1U<<6) #define XEN_MINIMAL_CR4 (X86_CR4_PGE | X86_CR4_PAE) +#define XEN_CR4_PV32_BITS (X86_CR4_SMEP | X86_CR4_SMAP) + #define XEN_SYSCALL_MASK (X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF| \ X86_EFLAGS_NT|X86_EFLAGS_DF|X86_EFLAGS_IF| \ X86_EFLAGS_TF) @@ -212,6 +214,8 @@ /* Maximum width of physical addresses supported by the hardware */ extern unsigned int paddr_bits; +/* Max physical address width supported within HAP guests */ +extern unsigned int hap_paddr_bits; extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id table[]); @@ -323,16 +327,14 @@ return cr2; } -DECLARE_PER_CPU(unsigned long, cr4); - static inline unsigned long read_cr4(void) { - return this_cpu(cr4); + return get_cpu_info()->cr4; } static inline void write_cr4(unsigned long val) { - this_cpu(cr4) = val; + get_cpu_info()->cr4 = val; asm volatile ( "mov %0,%%cr4" : : "r" (val) ); } diff -Nru xen-4.6.0/xen/include/asm-x86/spinlock.h xen-4.6.5/xen/include/asm-x86/spinlock.h --- xen-4.6.0/xen/include/asm-x86/spinlock.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/spinlock.h 2017-03-07 16:19:05.000000000 +0000 @@ -4,4 +4,18 @@ #define _raw_read_unlock(l) \ asm volatile ( "lock; dec%z0 %0" : "+m" ((l)->lock) :: "memory" ) +/* + * On x86 the only reordering is of reads with older writes. In the + * lock case, the read in observe_head() can only be reordered with + * writes that precede it, and moving a write _into_ a locked section + * is OK. In the release case, the write in add_sized() can only be + * reordered with reads that follow it, and hoisting a read _into_ a + * locked region is OK. + */ +#define arch_lock_acquire_barrier() barrier() +#define arch_lock_release_barrier() barrier() + +#define arch_lock_relax() cpu_relax() +#define arch_lock_signal() + #endif /* __ASM_SPINLOCK_H */ diff -Nru xen-4.6.0/xen/include/asm-x86/system.h xen-4.6.5/xen/include/asm-x86/system.h --- xen-4.6.0/xen/include/asm-x86/system.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/system.h 2017-03-07 16:19:05.000000000 +0000 @@ -185,17 +185,6 @@ #define set_mb(var, value) do { xchg(&var, value); } while (0) #define set_wmb(var, value) do { var = value; wmb(); } while (0) -/* - * On x86 the only reordering is of reads with older writes. In the - * lock case, the read in observe_head() can only be reordered with - * writes that precede it, and moving a write _into_ a locked section - * is OK. In the release case, the write in add_sized() can only be - * reordered with reads that follow it, and hoisting a read _into_ a - * locked region is OK. - */ -#define arch_lock_acquire_barrier() barrier() -#define arch_lock_release_barrier() barrier() - #define local_irq_disable() asm volatile ( "cli" : : : "memory" ) #define local_irq_enable() asm volatile ( "sti" : : : "memory" ) diff -Nru xen-4.6.0/xen/include/asm-x86/x86_64/page.h xen-4.6.5/xen/include/asm-x86/x86_64/page.h --- xen-4.6.0/xen/include/asm-x86/x86_64/page.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/asm-x86/x86_64/page.h 2017-03-07 16:19:05.000000000 +0000 @@ -141,6 +141,12 @@ #define _PAGE_GNTTAB (1U<<22) /* + * Bit 24 of a 24-bit flag mask! This is not any bit of a real pte, + * and is only used for signalling in variables that contain flags. + */ +#define _PAGE_INVALID_BIT (1U<<24) + +/* * Bit 12 of a 24-bit flag mask. This corresponds to bit 52 of a pte. * This is needed to distinguish between user and kernel PTEs since _PAGE_USER * is asserted for both. diff -Nru xen-4.6.0/xen/include/public/arch-arm.h xen-4.6.5/xen/include/public/arch-arm.h --- xen-4.6.0/xen/include/public/arch-arm.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/public/arch-arm.h 2017-03-07 16:19:05.000000000 +0000 @@ -397,7 +397,7 @@ #define GUEST_GICD_BASE 0x03001000ULL #define GUEST_GICD_SIZE 0x00001000ULL #define GUEST_GICC_BASE 0x03002000ULL -#define GUEST_GICC_SIZE 0x00000100ULL +#define GUEST_GICC_SIZE 0x00002000ULL /* vGIC v3 mappings */ #define GUEST_GICV3_GICD_BASE 0x03001000ULL diff -Nru xen-4.6.0/xen/include/public/grant_table.h xen-4.6.5/xen/include/public/grant_table.h --- xen-4.6.0/xen/include/public/grant_table.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/public/grant_table.h 2017-03-07 16:19:05.000000000 +0000 @@ -43,7 +43,7 @@ * table are identified by grant references. A grant reference is an * integer, which indexes into the grant table. It acts as a * capability which the grantee can use to perform operations on the - * granter’s memory. + * granter's memory. * * This capability-based system allows shared-memory communications * between unprivileged domains. A grant reference also encapsulates diff -Nru xen-4.6.0/xen/include/public/hvm/params.h xen-4.6.5/xen/include/public/hvm/params.h --- xen-4.6.0/xen/include/public/hvm/params.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/public/hvm/params.h 2017-03-07 16:19:05.000000000 +0000 @@ -192,6 +192,28 @@ /* Boolean: Enable altp2m */ #define HVM_PARAM_ALTP2M 35 -#define HVM_NR_PARAMS 36 +/* + * Size of the x87 FPU FIP/FDP registers that the hypervisor needs to + * save/restore. This is a workaround for a hardware limitation that + * does not allow the full FIP/FDP and FCS/FDS to be restored. + * + * Valid values are: + * + * 8: save/restore 64-bit FIP/FDP and clear FCS/FDS (default if CPU + * has FPCSDS feature). + * + * 4: save/restore 32-bit FIP/FDP, FCS/FDS, and clear upper 32-bits of + * FIP/FDP. + * + * 0: allow hypervisor to choose based on the value of FIP/FDP + * (default if CPU does not have FPCSDS). + * + * If FPCSDS (bit 13 in CPUID leaf 0x7, subleaf 0x0) is set, the CPU + * never saves FCS/FDS and this parameter should be left at the + * default of 8. + */ +#define HVM_PARAM_X87_FIP_WIDTH 36 + +#define HVM_NR_PARAMS 37 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff -Nru xen-4.6.0/xen/include/public/io/libxenvchan.h xen-4.6.5/xen/include/public/io/libxenvchan.h --- xen-4.6.0/xen/include/public/io/libxenvchan.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/public/io/libxenvchan.h 2017-03-07 16:19:05.000000000 +0000 @@ -10,18 +10,23 @@ * * @section LICENSE * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; If not, see . + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. * * @section DESCRIPTION * diff -Nru xen-4.6.0/xen/include/public/io/usbif.h xen-4.6.5/xen/include/public/io/usbif.h --- xen-4.6.0/xen/include/public/io/usbif.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/public/io/usbif.h 2017-03-07 16:19:05.000000000 +0000 @@ -170,6 +170,7 @@ #define USBIF_MAX_SEGMENTS_PER_REQUEST (16) #define USBIF_MAX_PORTNR 31 +#define USBIF_RING_SIZE 4096 /* * RING for transferring urbs. @@ -225,7 +226,7 @@ typedef struct usbif_urb_response usbif_urb_response_t; DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response); -#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, PAGE_SIZE) +#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, USBIF_RING_SIZE) /* * RING for notifying connect/disconnect events to frontend @@ -247,6 +248,6 @@ typedef struct usbif_conn_response usbif_conn_response_t; DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response); -#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, PAGE_SIZE) +#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, USBIF_RING_SIZE) #endif /* __XEN_PUBLIC_IO_USBIF_H__ */ diff -Nru xen-4.6.0/xen/include/public/io/vscsiif.h xen-4.6.5/xen/include/public/io/vscsiif.h --- xen-4.6.0/xen/include/public/io/vscsiif.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/public/io/vscsiif.h 2017-03-07 16:19:05.000000000 +0000 @@ -179,6 +179,7 @@ */ #define VSCSIIF_MAX_COMMAND_SIZE 16 #define VSCSIIF_SENSE_BUFFERSIZE 96 +#define VSCSIIF_PAGE_SIZE 4096 struct scsiif_request_segment { grant_ref_t gref; @@ -187,7 +188,7 @@ }; typedef struct scsiif_request_segment vscsiif_segment_t; -#define VSCSIIF_SG_PER_PAGE (PAGE_SIZE / sizeof(struct scsiif_request_segment)) +#define VSCSIIF_SG_PER_PAGE (VSCSIIF_PAGE_SIZE / sizeof(struct scsiif_request_segment)) /* Size of one request is 252 bytes */ struct vscsiif_request { diff -Nru xen-4.6.0/xen/include/xen/bitops.h xen-4.6.5/xen/include/xen/bitops.h --- xen-4.6.0/xen/include/xen/bitops.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/xen/bitops.h 2017-03-07 16:19:05.000000000 +0000 @@ -3,6 +3,14 @@ #include /* + * Create a contiguous bitmask starting at bit position @l and ending at + * position @h. For example + * GENMASK(30, 21) gives us the 32bit vector 0x01fe00000. + */ +#define GENMASK(h, l) \ + (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +/* * ffs: find first bit set. This is defined the same way as * the libc and compiler builtin ffs routines, therefore * differs in spirit from the above ffz (man ffs). diff -Nru xen-4.6.0/xen/include/xen/compiler.h xen-4.6.5/xen/include/xen/compiler.h --- xen-4.6.0/xen/include/xen/compiler.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/xen/compiler.h 2017-03-07 16:19:05.000000000 +0000 @@ -34,6 +34,8 @@ #define __used_section(s) __used __attribute__((__section__(s))) #define __text_section(s) __attribute__((__section__(s))) +#define __aligned(a) __attribute__((__aligned__(a))) + #ifdef INIT_SECTIONS_ONLY /* * For sources indicated to have only init code, make sure even diff -Nru xen-4.6.0/xen/include/xen/event.h xen-4.6.5/xen/include/xen/event.h --- xen-4.6.0/xen/include/xen/event.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/xen/event.h 2017-03-07 16:19:05.000000000 +0000 @@ -136,6 +136,11 @@ void (*unmask)(struct domain *d, struct evtchn *evtchn); bool_t (*is_pending)(struct domain *d, const struct evtchn *evtchn); bool_t (*is_masked)(struct domain *d, const struct evtchn *evtchn); + /* + * Is the port unavailable because it's still being cleaned up + * after being closed? + */ + bool_t (*is_busy)(struct domain *d, evtchn_port_t port); int (*set_priority)(struct domain *d, struct evtchn *evtchn, unsigned int priority); void (*print_state)(struct domain *d, const struct evtchn *evtchn); @@ -178,6 +183,13 @@ return d->evtchn_port_ops->is_masked(d, evtchn); } +static inline bool_t evtchn_port_is_busy(struct domain *d, evtchn_port_t port) +{ + if ( d->evtchn_port_ops->is_busy ) + return d->evtchn_port_ops->is_busy(d, port); + return 0; +} + static inline int evtchn_port_set_priority(struct domain *d, struct evtchn *evtchn, unsigned int priority) diff -Nru xen-4.6.0/xen/include/xen/hvm/iommu.h xen-4.6.5/xen/include/xen/hvm/iommu.h --- xen-4.6.0/xen/include/xen/hvm/iommu.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/xen/hvm/iommu.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2006, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; If not, see . - * - * Copyright (C) Allen Kay - */ - -#ifndef __XEN_HVM_IOMMU_H__ -#define __XEN_HVM_IOMMU_H__ - -#include -#include -#include - -struct hvm_iommu { - struct arch_hvm_iommu arch; - - /* iommu_ops */ - const struct iommu_ops *platform_ops; - -#ifdef HAS_DEVICE_TREE - /* List of DT devices assigned to this domain */ - struct list_head dt_devices; -#endif - - /* Features supported by the IOMMU */ - DECLARE_BITMAP(features, IOMMU_FEAT_count); -}; - -#define iommu_set_feature(d, f) set_bit((f), domain_hvm_iommu(d)->features) -#define iommu_clear_feature(d, f) clear_bit((f), domain_hvm_iommu(d)->features) - -#endif /* __XEN_HVM_IOMMU_H__ */ diff -Nru xen-4.6.0/xen/include/xen/iommu.h xen-4.6.5/xen/include/xen/iommu.h --- xen-4.6.0/xen/include/xen/iommu.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/xen/iommu.h 2017-03-07 16:19:05.000000000 +0000 @@ -86,6 +86,24 @@ bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature); +struct domain_iommu { + struct arch_iommu arch; + + /* iommu_ops */ + const struct iommu_ops *platform_ops; + +#ifdef HAS_DEVICE_TREE + /* List of DT devices assigned to this domain */ + struct list_head dt_devices; +#endif + + /* Features supported by the IOMMU */ + DECLARE_BITMAP(features, IOMMU_FEAT_count); +}; + +#define dom_iommu(d) (&(d)->iommu) +#define iommu_set_feature(d, f) set_bit(f, dom_iommu(d)->features) +#define iommu_clear_feature(d, f) clear_bit(f, dom_iommu(d)->features) #ifdef HAS_PCI void pt_pci_init(void); diff -Nru xen-4.6.0/xen/include/xen/mm.h xen-4.6.5/xen/include/xen/mm.h --- xen-4.6.0/xen/include/xen/mm.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/xen/mm.h 2017-03-07 16:19:05.000000000 +0000 @@ -159,6 +159,7 @@ unsigned int read_access:1; unsigned int write_access:1; unsigned int insn_fetch:1; + unsigned int present:1; unsigned int gla_valid:1; unsigned int kind:2; /* npfec_kind_t */ }; diff -Nru xen-4.6.0/xen/include/xen/p2m-common.h xen-4.6.5/xen/include/xen/p2m-common.h --- xen-4.6.0/xen/include/xen/p2m-common.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/xen/p2m-common.h 2017-03-07 16:19:05.000000000 +0000 @@ -15,14 +15,15 @@ * default. */ typedef enum { - p2m_access_rwx = 0, /* The default access type when not used. */ - p2m_access_wx = 1, - p2m_access_rx = 2, - p2m_access_x = 3, - p2m_access_rw = 4, - p2m_access_w = 5, - p2m_access_r = 6, - p2m_access_n = 7, /* No access allowed. */ + /* Code uses bottom three bits with bitmask semantics */ + p2m_access_n = 0, /* No access allowed. */ + p2m_access_r = 1 << 0, + p2m_access_w = 1 << 1, + p2m_access_x = 1 << 2, + p2m_access_rw = p2m_access_r | p2m_access_w, + p2m_access_rx = p2m_access_r | p2m_access_x, + p2m_access_wx = p2m_access_w | p2m_access_x, + p2m_access_rwx = p2m_access_r | p2m_access_w | p2m_access_x, p2m_access_rx2rw = 8, /* Special: page goes from RX to RW on write */ p2m_access_n2rwx = 9, /* Special: page goes from N to RWX on access, * diff -Nru xen-4.6.0/xen/include/xen/sched.h xen-4.6.5/xen/include/xen/sched.h --- xen-4.6.0/xen/include/xen/sched.h 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/include/xen/sched.h 2017-03-07 16:19:05.000000000 +0000 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -368,6 +369,8 @@ int64_t time_offset_seconds; #ifdef HAS_PASSTHROUGH + struct domain_iommu iommu; + /* Does this guest need iommu mappings (-1 meaning "being set up")? */ s8 need_iommu; #endif diff -Nru xen-4.6.0/xen/Makefile xen-4.6.5/xen/Makefile --- xen-4.6.0/xen/Makefile 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/Makefile 2017-03-07 16:19:05.000000000 +0000 @@ -2,7 +2,7 @@ # All other places this is stored (eg. compile.h) should be autogenerated. export XEN_VERSION = 4 export XEN_SUBVERSION = 6 -export XEN_EXTRAVERSION ?= .0$(XEN_VENDORVERSION) +export XEN_EXTRAVERSION ?= .5$(XEN_VENDORVERSION) export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) -include xen-version diff -Nru xen-4.6.0/xen/xsm/flask/ss/policydb.c xen-4.6.5/xen/xsm/flask/ss/policydb.c --- xen-4.6.0/xen/xsm/flask/ss/policydb.c 2015-10-05 14:33:39.000000000 +0000 +++ xen-4.6.5/xen/xsm/flask/ss/policydb.c 2017-03-07 16:19:05.000000000 +0000 @@ -1258,6 +1258,7 @@ int rc; __le32 buf[3]; u32 len; + u32 ver = p->policyvers; role = xzalloc(struct role_datum); if ( !role ) @@ -1266,7 +1267,7 @@ goto out; } - if ( p->policyvers >= POLICYDB_VERSION_BOUNDARY ) + if ( ver >= POLICYDB_VERSION_BOUNDARY ) rc = next_entry(buf, fp, sizeof(buf[0]) * 3); else rc = next_entry(buf, fp, sizeof(buf[0]) * 2); @@ -1276,7 +1277,7 @@ len = le32_to_cpu(buf[0]); role->value = le32_to_cpu(buf[1]); - if ( p->policyvers >= POLICYDB_VERSION_BOUNDARY ) + if ( ver >= POLICYDB_VERSION_BOUNDARY ) role->bounds = le32_to_cpu(buf[2]); key = xmalloc_array(char, len + 1); @@ -1328,6 +1329,7 @@ int rc; __le32 buf[4]; u32 len; + u32 ver = p->policyvers; typdatum = xzalloc(struct type_datum); if ( !typdatum ) @@ -1336,7 +1338,7 @@ return rc; } - if ( p->policyvers >= POLICYDB_VERSION_BOUNDARY ) + if ( ver >= POLICYDB_VERSION_BOUNDARY ) rc = next_entry(buf, fp, sizeof(buf[0]) * 4); else rc = next_entry(buf, fp, sizeof(buf[0]) * 3); @@ -1346,7 +1348,7 @@ len = le32_to_cpu(buf[0]); typdatum->value = le32_to_cpu(buf[1]); - if ( p->policyvers >= POLICYDB_VERSION_BOUNDARY ) + if ( ver >= POLICYDB_VERSION_BOUNDARY ) { u32 prop = le32_to_cpu(buf[2]); @@ -1421,6 +1423,7 @@ int rc; __le32 buf[3]; u32 len; + u32 ver = p->policyvers; usrdatum = xzalloc(struct user_datum); if ( !usrdatum ) @@ -1429,7 +1432,7 @@ goto out; } - if ( p->policyvers >= POLICYDB_VERSION_BOUNDARY ) + if ( ver >= POLICYDB_VERSION_BOUNDARY ) rc = next_entry(buf, fp, sizeof(buf[0]) * 3); else rc = next_entry(buf, fp, sizeof(buf[0]) * 2); @@ -1439,7 +1442,7 @@ len = le32_to_cpu(buf[0]); usrdatum->value = le32_to_cpu(buf[1]); - if ( p->policyvers >= POLICYDB_VERSION_BOUNDARY ) + if ( ver >= POLICYDB_VERSION_BOUNDARY ) usrdatum->bounds = le32_to_cpu(buf[2]); key = xmalloc_array(char, len + 1); @@ -1457,7 +1460,7 @@ if ( rc ) goto bad; - if ( p->policyvers >= POLICYDB_VERSION_MLS ) + if ( ver >= POLICYDB_VERSION_MLS ) { rc = mls_read_range_helper(&usrdatum->range, fp); if ( rc )