From 8fa4cf76896ff082413d2f9f76d814ddcd486dd3 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 9 Feb 2012 21:19:49 +0100
Subject: [PATCH] pci-assign: Fix cpu_register_io_memory leak for slow mapping

RH-Author: Alex Williamson <alex.williamson@redhat.com>
Message-id: <20120209211349.15739.80667.stgit@virtlab9.virt.bos.redhat.com>
Patchwork-id: 37154
O-Subject: [RHEL6.3 qemu-kvm PATCH v2] pci-assign: Fix cpu_register_io_memory leak for slow mapping
Bugzilla: 738519
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
RH-Acked-by: Don Dutile <ddutile@redhat.com>
RH-Acked-by: Amos Kong <akong@redhat.com>

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=738519
Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=4025427
Upstream: N/A (inadvertently fixed by the memory API conversion upstream)

The slow map path for MMIO BARs (BARs < 4k) registers a new index
every time the BAR is mapped and never unregisters it.  The indexes
are a finite resource, so after quite a lot of hotplugs, we run out.
Thanks to there being no error checking, we ignore this and re-
register the mapping with a bogus value.  This typically results
in a segfault in the slow BAR access functions, where we dereference
and index into a bogus struct.

Switch this to register the IO memory index at initialization,
actually unregister it on free, and only map it when the guest
programs MMIO space.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

v2:
mmio_index -1 initialization now tracks marking a region valid so we'll
never hit free_assigned_device() with mmio_index in an uninitialized state.
Found by Laszlo.

Testing:
Without this patch, hot-plugging and unplugging an Intel ECHI controller
would segfault consistently after 500 cycles.  I've tested the fix through
over 50k cycles and this specific resping for over 1000.

 hw/device-assignment.c |   25 ++++++++++++++++++++++---
 hw/device-assignment.h |    1 +
 2 files changed, 23 insertions(+), 3 deletions(-)

Signed-off-by: Michal Novotny <minovotn@redhat.com>
---
 hw/device-assignment.c |   25 ++++++++++++++++++++++---
 hw/device-assignment.h |    1 +
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 307238c..e0fb27d 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -249,11 +249,9 @@ static void assigned_dev_iomem_map_slow(PCIDevice *pci_dev, int region_num,
     AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
     AssignedDevRegion *region = &r_dev->v_addrs[region_num];
     PCIRegion *real_region = &r_dev->real_device.regions[region_num];
-    int m;
 
     DEBUG("%s", "slow map\n");
-    m = cpu_register_io_memory(slow_bar_read, slow_bar_write, region);
-    cpu_register_physical_memory(e_phys, e_size, m);
+    cpu_register_physical_memory(e_phys, e_size, region->mmio_index);
 
     /* MSI-X MMIO page */
     if ((e_size > 0) &&
@@ -604,6 +602,16 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
                         i, (unsigned long long)cur_region->base_addr,
                         cur_region->size);
                 slow_map = 1;
+
+                pci_dev->v_addrs[i].mmio_index =
+                                 cpu_register_io_memory(slow_bar_read,
+                                                        slow_bar_write,
+                                                        &pci_dev->v_addrs[i]);
+                if (pci_dev->v_addrs[i].mmio_index < 0) {
+                    fprintf(stderr, "%s: Error registering IO memory\n",
+                            __func__);
+                    return -1;
+                }
             }
 
             /* map physical memory */
@@ -619,6 +627,12 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
                 fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!"
                         "\n", __func__,
                         (uint32_t) (cur_region->base_addr));
+
+                if (slow_map) {
+                    cpu_unregister_io_memory(pci_dev->v_addrs[i].mmio_index);
+                    pci_dev->v_addrs[i].mmio_index = -1;
+                }
+
                 return -1;
             }
 
@@ -767,6 +781,7 @@ again:
         rp->base_addr = start;
         rp->size = size;
         pci_dev->v_addrs[r].region = rp;
+        pci_dev->v_addrs[r].mmio_index = -1;
         DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
               r, rp->size, start, rp->type, rp->resource_fd);
     }
@@ -868,6 +883,10 @@ static void free_assigned_device(AssignedDevice *dev)
                     }
                 }
 
+                if (region->mmio_index >= 0) {
+                    cpu_unregister_io_memory(region->mmio_index);
+                }
+
                 if (region->u.r_virtbase) {
                     int ret = munmap(region->u.r_virtbase,
                                      (pci_region->size + 0xFFF) & 0xFFFFF000);
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index bc48d54..be4b1b1 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -68,6 +68,7 @@ typedef struct {
         uint32_t r_baseport; /* the base guest port for I/O regions */
     } u;
     int num;            /* our index within v_addrs[] */
+    int mmio_index;     /* cpu_register_io_memory (slow mapped BARs) */
     pcibus_t e_size;    /* emulated size of region in bytes */
     pcibus_t r_size;    /* real size of region in bytes */
     PCIRegion *region;
-- 
1.7.7.6

