on the same machine, the user should reserve enough EPC (by taking out
total virtual EPC size of all SGX VMs from the physical EPC size) for
host SGX applications so they can run with acceptable performance.
+
+Architectural behavior is to restore all EPC pages to an uninitialized
+state also after a guest reboot. Because this state can be reached only
+through the privileged ``ENCLS[EREMOVE]`` instruction, ``/dev/sgx_vepc``
+provides the ``SGX_IOC_VEPC_REMOVE_ALL`` ioctl to execute the instruction
+on all pages in the virtual EPC.
+
+``EREMOVE`` can fail for three reasons. Userspace must pay attention
+to expected failures and handle them as follows:
+
+1. Page removal will always fail when any thread is running in the
+ enclave to which the page belongs. In this case the ioctl will
+ return ``EBUSY`` independent of whether it has successfully removed
+ some pages; userspace can avoid these failures by preventing execution
+ of any vcpu which maps the virtual EPC.
+
+2. Page removal will cause a general protection fault if two calls to
+ ``EREMOVE`` happen concurrently for pages that refer to the same
+ "SECS" metadata pages. This can happen if there are concurrent
+ invocations to ``SGX_IOC_VEPC_REMOVE_ALL``, or if a ``/dev/sgx_vepc``
+ file descriptor in the guest is closed at the same time as
+ ``SGX_IOC_VEPC_REMOVE_ALL``; it will also be reported as ``EBUSY``.
+ This can be avoided in userspace by serializing calls to the ioctl()
+ and to close(), but in general it should not be a problem.
+
+3. Finally, page removal will fail for SECS metadata pages which still
+ have child pages. Child pages can be removed by executing
+ ``SGX_IOC_VEPC_REMOVE_ALL`` on all ``/dev/sgx_vepc`` file descriptors
+ mapped into the guest. This means that the ioctl() must be called
+ twice: an initial set of calls to remove child pages and a subsequent
+ set of calls to remove SECS pages. The second set of calls is only
+ required for those mappings that returned a nonzero value from the
+ first call. It indicates a bug in the kernel or the userspace client
+ if any of the second round of ``SGX_IOC_VEPC_REMOVE_ALL`` calls has
+ a return code other than 0.
return 0;
}
+static long sgx_vepc_remove_all(struct sgx_vepc *vepc)
+{
+ struct sgx_epc_page *entry;
+ unsigned long index;
+ long failures = 0;
+
+ xa_for_each(&vepc->page_array, index, entry) {
+ int ret = sgx_vepc_remove_page(entry);
+ if (ret) {
+ if (ret == SGX_CHILD_PRESENT) {
+ /* The page is a SECS, userspace will retry. */
+ failures++;
+ } else {
+ /*
+ * Report errors due to #GP or SGX_ENCLAVE_ACT; do not
+ * WARN, as userspace can induce said failures by
+ * calling the ioctl concurrently on multiple vEPCs or
+ * while one or more CPUs is running the enclave. Only
+ * a #PF on EREMOVE indicates a kernel/hardware issue.
+ */
+ WARN_ON_ONCE(encls_faulted(ret) &&
+ ENCLS_TRAPNR(ret) != X86_TRAP_GP);
+ return -EBUSY;
+ }
+ }
+ cond_resched();
+ }
+
+ /*
+ * Return the number of SECS pages that failed to be removed, so
+ * userspace knows that it has to retry.
+ */
+ return failures;
+}
+
static int sgx_vepc_release(struct inode *inode, struct file *file)
{
struct sgx_vepc *vepc = file->private_data;
return 0;
}
+static long sgx_vepc_ioctl(struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct sgx_vepc *vepc = file->private_data;
+
+ switch (cmd) {
+ case SGX_IOC_VEPC_REMOVE_ALL:
+ if (arg)
+ return -EINVAL;
+ return sgx_vepc_remove_all(vepc);
+
+ default:
+ return -ENOTTY;
+ }
+}
+
static const struct file_operations sgx_vepc_fops = {
.owner = THIS_MODULE,
.open = sgx_vepc_open,
+ .unlocked_ioctl = sgx_vepc_ioctl,
+ .compat_ioctl = sgx_vepc_ioctl,
.release = sgx_vepc_release,
.mmap = sgx_vepc_mmap,
};