--- /dev/null
-#include "hw/timer/mc146818rtc.h"
+ /*
+ * Copyright (c) 2018 Intel Corporation
+ * Copyright (c) 2019 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+ #include "qemu/osdep.h"
+ #include "qemu/error-report.h"
+ #include "qemu/cutils.h"
+ #include "qemu/units.h"
+ #include "qapi/error.h"
+ #include "qapi/visitor.h"
+ #include "qapi/qapi-visit-common.h"
+ #include "sysemu/sysemu.h"
+ #include "sysemu/cpus.h"
+ #include "sysemu/numa.h"
+ #include "sysemu/reset.h"
+
+ #include "hw/loader.h"
+ #include "hw/irq.h"
+ #include "hw/kvm/clock.h"
+ #include "hw/i386/microvm.h"
+ #include "hw/i386/x86.h"
+ #include "hw/i386/pc.h"
+ #include "target/i386/cpu.h"
+ #include "hw/timer/i8254.h"
++#include "hw/rtc/mc146818rtc.h"
+ #include "hw/char/serial.h"
+ #include "hw/i386/topology.h"
+ #include "hw/i386/e820_memory_layout.h"
+ #include "hw/i386/fw_cfg.h"
+ #include "hw/virtio/virtio-mmio.h"
+
+ #include "cpu.h"
+ #include "elf.h"
+ #include "kvm_i386.h"
+ #include "hw/xen/start_info.h"
+
+ #define MICROVM_BIOS_FILENAME "bios-microvm.bin"
+
+ static void microvm_set_rtc(MicrovmMachineState *mms, ISADevice *s)
+ {
+ X86MachineState *x86ms = X86_MACHINE(mms);
+ int val;
+
+ val = MIN(x86ms->below_4g_mem_size / KiB, 640);
+ rtc_set_memory(s, 0x15, val);
+ rtc_set_memory(s, 0x16, val >> 8);
+ /* extended memory (next 64MiB) */
+ if (x86ms->below_4g_mem_size > 1 * MiB) {
+ val = (x86ms->below_4g_mem_size - 1 * MiB) / KiB;
+ } else {
+ val = 0;
+ }
+ if (val > 65535) {
+ val = 65535;
+ }
+ rtc_set_memory(s, 0x17, val);
+ rtc_set_memory(s, 0x18, val >> 8);
+ rtc_set_memory(s, 0x30, val);
+ rtc_set_memory(s, 0x31, val >> 8);
+ /* memory between 16MiB and 4GiB */
+ if (x86ms->below_4g_mem_size > 16 * MiB) {
+ val = (x86ms->below_4g_mem_size - 16 * MiB) / (64 * KiB);
+ } else {
+ val = 0;
+ }
+ if (val > 65535) {
+ val = 65535;
+ }
+ rtc_set_memory(s, 0x34, val);
+ rtc_set_memory(s, 0x35, val >> 8);
+ /* memory above 4GiB */
+ val = x86ms->above_4g_mem_size / 65536;
+ rtc_set_memory(s, 0x5b, val);
+ rtc_set_memory(s, 0x5c, val >> 8);
+ rtc_set_memory(s, 0x5d, val >> 16);
+ }
+
+ static void microvm_gsi_handler(void *opaque, int n, int level)
+ {
+ GSIState *s = opaque;
+
+ qemu_set_irq(s->ioapic_irq[n], level);
+ }
+
+ static void microvm_devices_init(MicrovmMachineState *mms)
+ {
+ X86MachineState *x86ms = X86_MACHINE(mms);
+ ISABus *isa_bus;
+ ISADevice *rtc_state;
+ GSIState *gsi_state;
+ int i;
+
+ /* Core components */
+
+ gsi_state = g_malloc0(sizeof(*gsi_state));
+ if (mms->pic == ON_OFF_AUTO_ON || mms->pic == ON_OFF_AUTO_AUTO) {
+ x86ms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
+ } else {
+ x86ms->gsi = qemu_allocate_irqs(microvm_gsi_handler,
+ gsi_state, GSI_NUM_PINS);
+ }
+
+ isa_bus = isa_bus_new(NULL, get_system_memory(), get_system_io(),
+ &error_abort);
+ isa_bus_irqs(isa_bus, x86ms->gsi);
+
+ ioapic_init_gsi(gsi_state, "machine");
+
+ kvmclock_create();
+
+ for (i = 0; i < VIRTIO_NUM_TRANSPORTS; i++) {
+ sysbus_create_simple("virtio-mmio",
+ VIRTIO_MMIO_BASE + i * 512,
+ x86ms->gsi[VIRTIO_IRQ_BASE + i]);
+ }
+
+ /* Optional and legacy devices */
+
+ if (mms->pic == ON_OFF_AUTO_ON || mms->pic == ON_OFF_AUTO_AUTO) {
+ qemu_irq *i8259;
+
+ i8259 = i8259_init(isa_bus, pc_allocate_cpu_irq());
+ for (i = 0; i < ISA_NUM_IRQS; i++) {
+ gsi_state->i8259_irq[i] = i8259[i];
+ }
+ g_free(i8259);
+ }
+
+ if (mms->pit == ON_OFF_AUTO_ON || mms->pit == ON_OFF_AUTO_AUTO) {
+ if (kvm_pit_in_kernel()) {
+ kvm_pit_init(isa_bus, 0x40);
+ } else {
+ i8254_pit_init(isa_bus, 0x40, 0, NULL);
+ }
+ }
+
+ if (mms->rtc == ON_OFF_AUTO_ON ||
+ (mms->rtc == ON_OFF_AUTO_AUTO && !kvm_enabled())) {
+ rtc_state = mc146818_rtc_init(isa_bus, 2000, NULL);
+ microvm_set_rtc(mms, rtc_state);
+ }
+
+ if (mms->isa_serial) {
+ serial_hds_isa_init(isa_bus, 0, 1);
+ }
+
+ if (bios_name == NULL) {
+ bios_name = MICROVM_BIOS_FILENAME;
+ }
+ x86_bios_rom_init(get_system_memory(), true);
+ }
+
+ static void microvm_memory_init(MicrovmMachineState *mms)
+ {
+ MachineState *machine = MACHINE(mms);
+ X86MachineState *x86ms = X86_MACHINE(mms);
+ MemoryRegion *ram, *ram_below_4g, *ram_above_4g;
+ MemoryRegion *system_memory = get_system_memory();
+ FWCfgState *fw_cfg;
+ ram_addr_t lowmem;
+ int i;
+
+ /*
+ * Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
+ * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
+ * also known as MMCFG).
+ * If it doesn't, we need to split it in chunks below and above 4G.
+ * In any case, try to make sure that guest addresses aligned at
+ * 1G boundaries get mapped to host addresses aligned at 1G boundaries.
+ */
+ if (machine->ram_size >= 0xb0000000) {
+ lowmem = 0x80000000;
+ } else {
+ lowmem = 0xb0000000;
+ }
+
+ /*
+ * Handle the machine opt max-ram-below-4g. It is basically doing
+ * min(qemu limit, user limit).
+ */
+ if (!x86ms->max_ram_below_4g) {
+ x86ms->max_ram_below_4g = 4 * GiB;
+ }
+ if (lowmem > x86ms->max_ram_below_4g) {
+ lowmem = x86ms->max_ram_below_4g;
+ if (machine->ram_size - lowmem > lowmem &&
+ lowmem & (1 * GiB - 1)) {
+ warn_report("There is possibly poor performance as the ram size "
+ " (0x%" PRIx64 ") is more then twice the size of"
+ " max-ram-below-4g (%"PRIu64") and"
+ " max-ram-below-4g is not a multiple of 1G.",
+ (uint64_t)machine->ram_size, x86ms->max_ram_below_4g);
+ }
+ }
+
+ if (machine->ram_size > lowmem) {
+ x86ms->above_4g_mem_size = machine->ram_size - lowmem;
+ x86ms->below_4g_mem_size = lowmem;
+ } else {
+ x86ms->above_4g_mem_size = 0;
+ x86ms->below_4g_mem_size = machine->ram_size;
+ }
+
+ ram = g_malloc(sizeof(*ram));
+ memory_region_allocate_system_memory(ram, NULL, "microvm.ram",
+ machine->ram_size);
+
+ ram_below_4g = g_malloc(sizeof(*ram_below_4g));
+ memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
+ 0, x86ms->below_4g_mem_size);
+ memory_region_add_subregion(system_memory, 0, ram_below_4g);
+
+ e820_add_entry(0, x86ms->below_4g_mem_size, E820_RAM);
+
+ if (x86ms->above_4g_mem_size > 0) {
+ ram_above_4g = g_malloc(sizeof(*ram_above_4g));
+ memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
+ x86ms->below_4g_mem_size,
+ x86ms->above_4g_mem_size);
+ memory_region_add_subregion(system_memory, 0x100000000ULL,
+ ram_above_4g);
+ e820_add_entry(0x100000000ULL, x86ms->above_4g_mem_size, E820_RAM);
+ }
+
+ fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4,
+ &address_space_memory);
+
+ fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, machine->smp.cpus);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, machine->smp.max_cpus);
+ fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override());
+ fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE,
+ &e820_reserve, sizeof(e820_reserve));
+ fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
+ sizeof(struct e820_entry) * e820_get_num_entries());
+
+ rom_set_fw(fw_cfg);
+
+ if (machine->kernel_filename != NULL) {
+ x86_load_linux(x86ms, fw_cfg, 0, true, true);
+ }
+
+ if (mms->option_roms) {
+ for (i = 0; i < nb_option_roms; i++) {
+ rom_add_option(option_rom[i].name, option_rom[i].bootindex);
+ }
+ }
+
+ x86ms->fw_cfg = fw_cfg;
+ x86ms->ioapic_as = &address_space_memory;
+ }
+
+ static gchar *microvm_get_mmio_cmdline(gchar *name)
+ {
+ gchar *cmdline;
+ gchar *separator;
+ long int index;
+ int ret;
+
+ separator = g_strrstr(name, ".");
+ if (!separator) {
+ return NULL;
+ }
+
+ if (qemu_strtol(separator + 1, NULL, 10, &index) != 0) {
+ return NULL;
+ }
+
+ cmdline = g_malloc0(VIRTIO_CMDLINE_MAXLEN);
+ ret = g_snprintf(cmdline, VIRTIO_CMDLINE_MAXLEN,
+ " virtio_mmio.device=512@0x%lx:%ld",
+ VIRTIO_MMIO_BASE + index * 512,
+ VIRTIO_IRQ_BASE + index);
+ if (ret < 0 || ret >= VIRTIO_CMDLINE_MAXLEN) {
+ g_free(cmdline);
+ return NULL;
+ }
+
+ return cmdline;
+ }
+
+ static void microvm_fix_kernel_cmdline(MachineState *machine)
+ {
+ X86MachineState *x86ms = X86_MACHINE(machine);
+ BusState *bus;
+ BusChild *kid;
+ char *cmdline;
+
+ /*
+ * Find MMIO transports with attached devices, and add them to the kernel
+ * command line.
+ *
+ * Yes, this is a hack, but one that heavily improves the UX without
+ * introducing any significant issues.
+ */
+ cmdline = g_strdup(machine->kernel_cmdline);
+ bus = sysbus_get_default();
+ QTAILQ_FOREACH(kid, &bus->children, sibling) {
+ DeviceState *dev = kid->child;
+ ObjectClass *class = object_get_class(OBJECT(dev));
+
+ if (class == object_class_by_name(TYPE_VIRTIO_MMIO)) {
+ VirtIOMMIOProxy *mmio = VIRTIO_MMIO(OBJECT(dev));
+ VirtioBusState *mmio_virtio_bus = &mmio->bus;
+ BusState *mmio_bus = &mmio_virtio_bus->parent_obj;
+
+ if (!QTAILQ_EMPTY(&mmio_bus->children)) {
+ gchar *mmio_cmdline = microvm_get_mmio_cmdline(mmio_bus->name);
+ if (mmio_cmdline) {
+ char *newcmd = g_strjoin(NULL, cmdline, mmio_cmdline, NULL);
+ g_free(mmio_cmdline);
+ g_free(cmdline);
+ cmdline = newcmd;
+ }
+ }
+ }
+ }
+
+ fw_cfg_modify_i32(x86ms->fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(cmdline) + 1);
+ fw_cfg_modify_string(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA, cmdline);
+ }
+
+ static void microvm_machine_state_init(MachineState *machine)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(machine);
+ X86MachineState *x86ms = X86_MACHINE(machine);
+ Error *local_err = NULL;
+
+ microvm_memory_init(mms);
+
+ x86_cpus_init(x86ms, CPU_VERSION_LATEST);
+ if (local_err) {
+ error_report_err(local_err);
+ exit(1);
+ }
+
+ microvm_devices_init(mms);
+ }
+
+ static void microvm_machine_reset(MachineState *machine)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(machine);
+ CPUState *cs;
+ X86CPU *cpu;
+
+ if (machine->kernel_filename != NULL &&
+ mms->auto_kernel_cmdline && !mms->kernel_cmdline_fixed) {
+ microvm_fix_kernel_cmdline(machine);
+ mms->kernel_cmdline_fixed = true;
+ }
+
+ qemu_devices_reset();
+
+ CPU_FOREACH(cs) {
+ cpu = X86_CPU(cs);
+
+ if (cpu->apic_state) {
+ device_reset(cpu->apic_state);
+ }
+ }
+ }
+
+ static void microvm_machine_get_pic(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+ OnOffAuto pic = mms->pic;
+
+ visit_type_OnOffAuto(v, name, &pic, errp);
+ }
+
+ static void microvm_machine_set_pic(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+
+ visit_type_OnOffAuto(v, name, &mms->pic, errp);
+ }
+
+ static void microvm_machine_get_pit(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+ OnOffAuto pit = mms->pit;
+
+ visit_type_OnOffAuto(v, name, &pit, errp);
+ }
+
+ static void microvm_machine_set_pit(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+
+ visit_type_OnOffAuto(v, name, &mms->pit, errp);
+ }
+
+ static void microvm_machine_get_rtc(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+ OnOffAuto rtc = mms->rtc;
+
+ visit_type_OnOffAuto(v, name, &rtc, errp);
+ }
+
+ static void microvm_machine_set_rtc(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+
+ visit_type_OnOffAuto(v, name, &mms->rtc, errp);
+ }
+
+ static bool microvm_machine_get_isa_serial(Object *obj, Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+
+ return mms->isa_serial;
+ }
+
+ static void microvm_machine_set_isa_serial(Object *obj, bool value,
+ Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+
+ mms->isa_serial = value;
+ }
+
+ static bool microvm_machine_get_option_roms(Object *obj, Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+
+ return mms->option_roms;
+ }
+
+ static void microvm_machine_set_option_roms(Object *obj, bool value,
+ Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+
+ mms->option_roms = value;
+ }
+
+ static bool microvm_machine_get_auto_kernel_cmdline(Object *obj, Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+
+ return mms->auto_kernel_cmdline;
+ }
+
+ static void microvm_machine_set_auto_kernel_cmdline(Object *obj, bool value,
+ Error **errp)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+
+ mms->auto_kernel_cmdline = value;
+ }
+
+ static void microvm_machine_initfn(Object *obj)
+ {
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+
+ /* Configuration */
+ mms->pic = ON_OFF_AUTO_AUTO;
+ mms->pit = ON_OFF_AUTO_AUTO;
+ mms->rtc = ON_OFF_AUTO_AUTO;
+ mms->isa_serial = true;
+ mms->option_roms = true;
+ mms->auto_kernel_cmdline = true;
+
+ /* State */
+ mms->kernel_cmdline_fixed = false;
+ }
+
+ static void microvm_class_init(ObjectClass *oc, void *data)
+ {
+ MachineClass *mc = MACHINE_CLASS(oc);
+
+ mc->init = microvm_machine_state_init;
+
+ mc->family = "microvm_i386";
+ mc->desc = "microvm (i386)";
+ mc->units_per_default_bus = 1;
+ mc->no_floppy = 1;
+ mc->max_cpus = 288;
+ mc->has_hotpluggable_cpus = false;
+ mc->auto_enable_numa_with_memhp = false;
+ mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE;
+ mc->nvdimm_supported = false;
+
+ /* Avoid relying too much on kernel components */
+ mc->default_kernel_irqchip_split = true;
+
+ /* Machine class handlers */
+ mc->reset = microvm_machine_reset;
+
+ object_class_property_add(oc, MICROVM_MACHINE_PIC, "OnOffAuto",
+ microvm_machine_get_pic,
+ microvm_machine_set_pic,
+ NULL, NULL, &error_abort);
+ object_class_property_set_description(oc, MICROVM_MACHINE_PIC,
+ "Enable i8259 PIC", &error_abort);
+
+ object_class_property_add(oc, MICROVM_MACHINE_PIT, "OnOffAuto",
+ microvm_machine_get_pit,
+ microvm_machine_set_pit,
+ NULL, NULL, &error_abort);
+ object_class_property_set_description(oc, MICROVM_MACHINE_PIT,
+ "Enable i8254 PIT", &error_abort);
+
+ object_class_property_add(oc, MICROVM_MACHINE_RTC, "OnOffAuto",
+ microvm_machine_get_rtc,
+ microvm_machine_set_rtc,
+ NULL, NULL, &error_abort);
+ object_class_property_set_description(oc, MICROVM_MACHINE_RTC,
+ "Enable MC146818 RTC", &error_abort);
+
+ object_class_property_add_bool(oc, MICROVM_MACHINE_ISA_SERIAL,
+ microvm_machine_get_isa_serial,
+ microvm_machine_set_isa_serial,
+ &error_abort);
+ object_class_property_set_description(oc, MICROVM_MACHINE_ISA_SERIAL,
+ "Set off to disable the instantiation an ISA serial port",
+ &error_abort);
+
+ object_class_property_add_bool(oc, MICROVM_MACHINE_OPTION_ROMS,
+ microvm_machine_get_option_roms,
+ microvm_machine_set_option_roms,
+ &error_abort);
+ object_class_property_set_description(oc, MICROVM_MACHINE_OPTION_ROMS,
+ "Set off to disable loading option ROMs", &error_abort);
+
+ object_class_property_add_bool(oc, MICROVM_MACHINE_AUTO_KERNEL_CMDLINE,
+ microvm_machine_get_auto_kernel_cmdline,
+ microvm_machine_set_auto_kernel_cmdline,
+ &error_abort);
+ object_class_property_set_description(oc,
+ MICROVM_MACHINE_AUTO_KERNEL_CMDLINE,
+ "Set off to disable adding virtio-mmio devices to the kernel cmdline",
+ &error_abort);
+ }
+
+ static const TypeInfo microvm_machine_info = {
+ .name = TYPE_MICROVM_MACHINE,
+ .parent = TYPE_X86_MACHINE,
+ .instance_size = sizeof(MicrovmMachineState),
+ .instance_init = microvm_machine_initfn,
+ .class_size = sizeof(MicrovmMachineClass),
+ .class_init = microvm_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { }
+ },
+ };
+
+ static void microvm_machine_init(void)
+ {
+ type_register_static(µvm_machine_info);
+ }
+ type_init(microvm_machine_init);
--- /dev/null
- #include "qapi/qapi-commands-misc-target.h"
+/*
+ * QEMU MC146818 RTC emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+#include "qemu/module.h"
+#include "qemu/bcd.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+#include "qemu/timer.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/replay.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "hw/rtc/mc146818rtc.h"
+#include "hw/rtc/mc146818rtc_regs.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
- #define MC146818_RTC(obj) OBJECT_CHECK(RTCState, (obj), TYPE_MC146818_RTC)
-
- typedef struct RTCState {
- ISADevice parent_obj;
-
- MemoryRegion io;
- MemoryRegion coalesced_io;
- uint8_t cmos_data[128];
- uint8_t cmos_index;
- int32_t base_year;
- uint64_t base_rtc;
- uint64_t last_update;
- int64_t offset;
- qemu_irq irq;
- int it_shift;
- /* periodic timer */
- QEMUTimer *periodic_timer;
- int64_t next_periodic_time;
- /* update-ended timer */
- QEMUTimer *update_timer;
- uint64_t next_alarm_time;
- uint16_t irq_reinject_on_ack_count;
- uint32_t irq_coalesced;
- uint32_t period;
- QEMUTimer *coalesced_timer;
- LostTickPolicy lost_tick_policy;
- Notifier suspend_notifier;
- QLIST_ENTRY(RTCState) link;
- } RTCState;
-
+#include "qapi/qapi-events-misc-target.h"
+#include "qapi/visitor.h"
+#include "exec/address-spaces.h"
++#include "hw/rtc/mc146818rtc_regs.h"
+
+#ifdef TARGET_I386
++#include "qapi/qapi-commands-misc-target.h"
+#include "hw/i386/apic.h"
+#endif
+
+//#define DEBUG_CMOS
+//#define DEBUG_COALESCED
+
+#ifdef DEBUG_CMOS
+# define CMOS_DPRINTF(format, ...) printf(format, ## __VA_ARGS__)
+#else
+# define CMOS_DPRINTF(format, ...) do { } while (0)
+#endif
+
+#ifdef DEBUG_COALESCED
+# define DPRINTF_C(format, ...) printf(format, ## __VA_ARGS__)
+#else
+# define DPRINTF_C(format, ...) do { } while (0)
+#endif
+
+#define SEC_PER_MIN 60
+#define MIN_PER_HOUR 60
+#define SEC_PER_HOUR 3600
+#define HOUR_PER_DAY 24
+#define SEC_PER_DAY 86400
+
+#define RTC_REINJECT_ON_ACK_COUNT 20
+#define RTC_CLOCK_RATE 32768
+#define UIP_HOLD_LENGTH (8 * NANOSECONDS_PER_SECOND / 32768)
+
- if (period) {
- /* compute 32 khz clock */
- cur_clock =
- muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
+static void rtc_set_time(RTCState *s);
+static void rtc_update_time(RTCState *s);
+static void rtc_set_cmos(RTCState *s, const struct tm *tm);
+static inline int rtc_from_bcd(RTCState *s, int a);
+static uint64_t get_next_alarm(RTCState *s);
+
+static inline bool rtc_running(RTCState *s)
+{
+ return (!(s->cmos_data[RTC_REG_B] & REG_B_SET) &&
+ (s->cmos_data[RTC_REG_A] & 0x70) <= 0x20);
+}
+
+static uint64_t get_guest_rtc_ns(RTCState *s)
+{
+ uint64_t guest_clock = qemu_clock_get_ns(rtc_clock);
+
+ return s->base_rtc * NANOSECONDS_PER_SECOND +
+ guest_clock - s->last_update + s->offset;
+}
+
+static void rtc_coalesced_timer_update(RTCState *s)
+{
+ if (s->irq_coalesced == 0) {
+ timer_del(s->coalesced_timer);
+ } else {
+ /* divide each RTC interval to 2 - 8 smaller intervals */
+ int c = MIN(s->irq_coalesced, 7) + 1;
+ int64_t next_clock = qemu_clock_get_ns(rtc_clock) +
+ periodic_clock_to_ns(s->period / c);
+ timer_mod(s->coalesced_timer, next_clock);
+ }
+}
+
+static QLIST_HEAD(, RTCState) rtc_devices =
+ QLIST_HEAD_INITIALIZER(rtc_devices);
+
+#ifdef TARGET_I386
+void qmp_rtc_reset_reinjection(Error **errp)
+{
+ RTCState *s;
+
+ QLIST_FOREACH(s, &rtc_devices, link) {
+ s->irq_coalesced = 0;
+ }
+}
+
+static bool rtc_policy_slew_deliver_irq(RTCState *s)
+{
+ apic_reset_irq_delivered();
+ qemu_irq_raise(s->irq);
+ return apic_get_irq_delivered();
+}
+
+static void rtc_coalesced_timer(void *opaque)
+{
+ RTCState *s = opaque;
+
+ if (s->irq_coalesced != 0) {
+ s->cmos_data[RTC_REG_C] |= 0xc0;
+ DPRINTF_C("cmos: injecting from timer\n");
+ if (rtc_policy_slew_deliver_irq(s)) {
+ s->irq_coalesced--;
+ DPRINTF_C("cmos: coalesced irqs decreased to %d\n",
+ s->irq_coalesced);
+ }
+ }
+
+ rtc_coalesced_timer_update(s);
+}
+#else
+static bool rtc_policy_slew_deliver_irq(RTCState *s)
+{
+ assert(0);
+ return false;
+}
+#endif
+
+static uint32_t rtc_periodic_clock_ticks(RTCState *s)
+{
+ int period_code;
+
+ if (!(s->cmos_data[RTC_REG_B] & REG_B_PIE)) {
+ return 0;
+ }
+
+ period_code = s->cmos_data[RTC_REG_A] & 0x0f;
+
+ return periodic_period_to_clock(period_code);
+}
+
+/*
+ * handle periodic timer. @old_period indicates the periodic timer update
+ * is just due to period adjustment.
+ */
+static void
+periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
+{
+ uint32_t period;
+ int64_t cur_clock, next_irq_clock, lost_clock = 0;
+
+ period = rtc_periodic_clock_ticks(s);
+
- /*
- * if the periodic timer's update is due to period re-configuration,
- * we should count the clock since last interrupt.
- */
- if (old_period) {
- int64_t last_periodic_clock, next_periodic_clock;
-
- next_periodic_clock = muldiv64(s->next_periodic_time,
- RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
- last_periodic_clock = next_periodic_clock - old_period;
- lost_clock = cur_clock - last_periodic_clock;
- assert(lost_clock >= 0);
- }
++ if (!period) {
++ s->irq_coalesced = 0;
++ timer_del(s->periodic_timer);
++ return;
++ }
+
- /*
++ /* compute 32 khz clock */
++ cur_clock =
++ muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
++
++ /*
++ * if the periodic timer's update is due to period re-configuration,
++ * we should count the clock since last interrupt.
++ */
++ if (old_period) {
++ int64_t last_periodic_clock, next_periodic_clock;
++
++ next_periodic_clock = muldiv64(s->next_periodic_time,
++ RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
++ last_periodic_clock = next_periodic_clock - old_period;
++ lost_clock = cur_clock - last_periodic_clock;
++ assert(lost_clock >= 0);
+
+ /*
+ * s->irq_coalesced can change for two reasons:
+ *
+ * a) if one or more periodic timer interrupts have been lost,
+ * lost_clock will be more that a period.
+ *
+ * b) when the period may be reconfigured, we expect the OS to
+ * treat delayed tick as the new period. So, when switching
+ * from a shorter to a longer period, scale down the missing,
+ * because the OS will treat past delayed ticks as longer
+ * (leftovers are put back into lost_clock). When switching
+ * to a shorter period, scale up the missing ticks since the
+ * OS handler will treat past delayed ticks as shorter.
+ */
+ if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+ uint32_t old_irq_coalesced = s->irq_coalesced;
+
+ s->period = period;
+ lost_clock += old_irq_coalesced * old_period;
+ s->irq_coalesced = lost_clock / s->period;
+ lost_clock %= s->period;
+ if (old_irq_coalesced != s->irq_coalesced ||
+ old_period != s->period) {
+ DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, "
+ "period scaled from %d to %d\n", old_irq_coalesced,
+ s->irq_coalesced, old_period, s->period);
+ rtc_coalesced_timer_update(s);
+ }
+ } else {
- assert(lost_clock >= 0 && lost_clock <= period);
++ /*
+ * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
+ * is not used, we should make the time progress anyway.
+ */
+ lost_clock = MIN(lost_clock, period);
+ }
++ }
+
- next_irq_clock = cur_clock + period - lost_clock;
- s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
- timer_mod(s->periodic_timer, s->next_periodic_time);
- } else {
- s->irq_coalesced = 0;
- timer_del(s->periodic_timer);
- }
++ assert(lost_clock >= 0 && lost_clock <= period);
+
- RTCState *s;
++ next_irq_clock = cur_clock + period - lost_clock;
++ s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
++ timer_mod(s->periodic_timer, s->next_periodic_time);
+}
+
+static void rtc_periodic_timer(void *opaque)
+{
+ RTCState *s = opaque;
+
+ periodic_timer_update(s, s->next_periodic_time, 0);
+ s->cmos_data[RTC_REG_C] |= REG_C_PF;
+ if (s->cmos_data[RTC_REG_B] & REG_B_PIE) {
+ s->cmos_data[RTC_REG_C] |= REG_C_IRQF;
+ if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+ if (s->irq_reinject_on_ack_count >= RTC_REINJECT_ON_ACK_COUNT)
+ s->irq_reinject_on_ack_count = 0;
+ if (!rtc_policy_slew_deliver_irq(s)) {
+ s->irq_coalesced++;
+ rtc_coalesced_timer_update(s);
+ DPRINTF_C("cmos: coalesced irqs increased to %d\n",
+ s->irq_coalesced);
+ }
+ } else
+ qemu_irq_raise(s->irq);
+ }
+}
+
+/* handle update-ended timer */
+static void check_update_timer(RTCState *s)
+{
+ uint64_t next_update_time;
+ uint64_t guest_nsec;
+ int next_alarm_sec;
+
+ /* From the data sheet: "Holding the dividers in reset prevents
+ * interrupts from operating, while setting the SET bit allows"
+ * them to occur.
+ */
+ if ((s->cmos_data[RTC_REG_A] & 0x60) == 0x60) {
+ assert((s->cmos_data[RTC_REG_A] & REG_A_UIP) == 0);
+ timer_del(s->update_timer);
+ return;
+ }
+
+ guest_nsec = get_guest_rtc_ns(s) % NANOSECONDS_PER_SECOND;
+ next_update_time = qemu_clock_get_ns(rtc_clock)
+ + NANOSECONDS_PER_SECOND - guest_nsec;
+
+ /* Compute time of next alarm. One second is already accounted
+ * for in next_update_time.
+ */
+ next_alarm_sec = get_next_alarm(s);
+ s->next_alarm_time = next_update_time +
+ (next_alarm_sec - 1) * NANOSECONDS_PER_SECOND;
+
+ /* If update_in_progress latched the UIP bit, we must keep the timer
+ * programmed to the next second, so that UIP is cleared. Otherwise,
+ * if UF is already set, we might be able to optimize.
+ */
+ if (!(s->cmos_data[RTC_REG_A] & REG_A_UIP) &&
+ (s->cmos_data[RTC_REG_C] & REG_C_UF)) {
+ /* If AF cannot change (i.e. either it is set already, or
+ * SET=1 and then the time is not updated), nothing to do.
+ */
+ if ((s->cmos_data[RTC_REG_B] & REG_B_SET) ||
+ (s->cmos_data[RTC_REG_C] & REG_C_AF)) {
+ timer_del(s->update_timer);
+ return;
+ }
+
+ /* UF is set, but AF is clear. Program the timer to target
+ * the alarm time. */
+ next_update_time = s->next_alarm_time;
+ }
+ if (next_update_time != timer_expire_time_ns(s->update_timer)) {
+ timer_mod(s->update_timer, next_update_time);
+ }
+}
+
+static inline uint8_t convert_hour(RTCState *s, uint8_t hour)
+{
+ if (!(s->cmos_data[RTC_REG_B] & REG_B_24H)) {
+ hour %= 12;
+ if (s->cmos_data[RTC_HOURS] & 0x80) {
+ hour += 12;
+ }
+ }
+ return hour;
+}
+
+static uint64_t get_next_alarm(RTCState *s)
+{
+ int32_t alarm_sec, alarm_min, alarm_hour, cur_hour, cur_min, cur_sec;
+ int32_t hour, min, sec;
+
+ rtc_update_time(s);
+
+ alarm_sec = rtc_from_bcd(s, s->cmos_data[RTC_SECONDS_ALARM]);
+ alarm_min = rtc_from_bcd(s, s->cmos_data[RTC_MINUTES_ALARM]);
+ alarm_hour = rtc_from_bcd(s, s->cmos_data[RTC_HOURS_ALARM]);
+ alarm_hour = alarm_hour == -1 ? -1 : convert_hour(s, alarm_hour);
+
+ cur_sec = rtc_from_bcd(s, s->cmos_data[RTC_SECONDS]);
+ cur_min = rtc_from_bcd(s, s->cmos_data[RTC_MINUTES]);
+ cur_hour = rtc_from_bcd(s, s->cmos_data[RTC_HOURS]);
+ cur_hour = convert_hour(s, cur_hour);
+
+ if (alarm_hour == -1) {
+ alarm_hour = cur_hour;
+ if (alarm_min == -1) {
+ alarm_min = cur_min;
+ if (alarm_sec == -1) {
+ alarm_sec = cur_sec + 1;
+ } else if (cur_sec > alarm_sec) {
+ alarm_min++;
+ }
+ } else if (cur_min == alarm_min) {
+ if (alarm_sec == -1) {
+ alarm_sec = cur_sec + 1;
+ } else {
+ if (cur_sec > alarm_sec) {
+ alarm_hour++;
+ }
+ }
+ if (alarm_sec == SEC_PER_MIN) {
+ /* wrap to next hour, minutes is not in don't care mode */
+ alarm_sec = 0;
+ alarm_hour++;
+ }
+ } else if (cur_min > alarm_min) {
+ alarm_hour++;
+ }
+ } else if (cur_hour == alarm_hour) {
+ if (alarm_min == -1) {
+ alarm_min = cur_min;
+ if (alarm_sec == -1) {
+ alarm_sec = cur_sec + 1;
+ } else if (cur_sec > alarm_sec) {
+ alarm_min++;
+ }
+
+ if (alarm_sec == SEC_PER_MIN) {
+ alarm_sec = 0;
+ alarm_min++;
+ }
+ /* wrap to next day, hour is not in don't care mode */
+ alarm_min %= MIN_PER_HOUR;
+ } else if (cur_min == alarm_min) {
+ if (alarm_sec == -1) {
+ alarm_sec = cur_sec + 1;
+ }
+ /* wrap to next day, hours+minutes not in don't care mode */
+ alarm_sec %= SEC_PER_MIN;
+ }
+ }
+
+ /* values that are still don't care fire at the next min/sec */
+ if (alarm_min == -1) {
+ alarm_min = 0;
+ }
+ if (alarm_sec == -1) {
+ alarm_sec = 0;
+ }
+
+ /* keep values in range */
+ if (alarm_sec == SEC_PER_MIN) {
+ alarm_sec = 0;
+ alarm_min++;
+ }
+ if (alarm_min == MIN_PER_HOUR) {
+ alarm_min = 0;
+ alarm_hour++;
+ }
+ alarm_hour %= HOUR_PER_DAY;
+
+ hour = alarm_hour - cur_hour;
+ min = hour * MIN_PER_HOUR + alarm_min - cur_min;
+ sec = min * SEC_PER_MIN + alarm_sec - cur_sec;
+ return sec <= 0 ? sec + SEC_PER_DAY : sec;
+}
+
+static void rtc_update_timer(void *opaque)
+{
+ RTCState *s = opaque;
+ int32_t irqs = REG_C_UF;
+ int32_t new_irqs;
+
+ assert((s->cmos_data[RTC_REG_A] & 0x60) != 0x60);
+
+ /* UIP might have been latched, update time and clear it. */
+ rtc_update_time(s);
+ s->cmos_data[RTC_REG_A] &= ~REG_A_UIP;
+
+ if (qemu_clock_get_ns(rtc_clock) >= s->next_alarm_time) {
+ irqs |= REG_C_AF;
+ if (s->cmos_data[RTC_REG_B] & REG_B_AIE) {
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_RTC, NULL);
+ }
+ }
+
+ new_irqs = irqs & ~s->cmos_data[RTC_REG_C];
+ s->cmos_data[RTC_REG_C] |= irqs;
+ if ((new_irqs & s->cmos_data[RTC_REG_B]) != 0) {
+ s->cmos_data[RTC_REG_C] |= REG_C_IRQF;
+ qemu_irq_raise(s->irq);
+ }
+ check_update_timer(s);
+}
+
+static void cmos_ioport_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ RTCState *s = opaque;
+ uint32_t old_period;
+ bool update_periodic_timer;
+
+ if ((addr & 1) == 0) {
+ s->cmos_index = data & 0x7f;
+ } else {
+ CMOS_DPRINTF("cmos: write index=0x%02x val=0x%02" PRIx64 "\n",
+ s->cmos_index, data);
+ switch(s->cmos_index) {
+ case RTC_SECONDS_ALARM:
+ case RTC_MINUTES_ALARM:
+ case RTC_HOURS_ALARM:
+ s->cmos_data[s->cmos_index] = data;
+ check_update_timer(s);
+ break;
+ case RTC_IBM_PS2_CENTURY_BYTE:
+ s->cmos_index = RTC_CENTURY;
+ /* fall through */
+ case RTC_CENTURY:
+ case RTC_SECONDS:
+ case RTC_MINUTES:
+ case RTC_HOURS:
+ case RTC_DAY_OF_WEEK:
+ case RTC_DAY_OF_MONTH:
+ case RTC_MONTH:
+ case RTC_YEAR:
+ s->cmos_data[s->cmos_index] = data;
+ /* if in set mode, do not update the time */
+ if (rtc_running(s)) {
+ rtc_set_time(s);
+ check_update_timer(s);
+ }
+ break;
+ case RTC_REG_A:
+ update_periodic_timer = (s->cmos_data[RTC_REG_A] ^ data) & 0x0f;
+ old_period = rtc_periodic_clock_ticks(s);
+
+ if ((data & 0x60) == 0x60) {
+ if (rtc_running(s)) {
+ rtc_update_time(s);
+ }
+ /* What happens to UIP when divider reset is enabled is
+ * unclear from the datasheet. Shouldn't matter much
+ * though.
+ */
+ s->cmos_data[RTC_REG_A] &= ~REG_A_UIP;
+ } else if (((s->cmos_data[RTC_REG_A] & 0x60) == 0x60) &&
+ (data & 0x70) <= 0x20) {
+ /* when the divider reset is removed, the first update cycle
+ * begins one-half second later*/
+ if (!(s->cmos_data[RTC_REG_B] & REG_B_SET)) {
+ s->offset = 500000000;
+ rtc_set_time(s);
+ }
+ s->cmos_data[RTC_REG_A] &= ~REG_A_UIP;
+ }
+ /* UIP bit is read only */
+ s->cmos_data[RTC_REG_A] = (data & ~REG_A_UIP) |
+ (s->cmos_data[RTC_REG_A] & REG_A_UIP);
+
+ if (update_periodic_timer) {
+ periodic_timer_update(s, qemu_clock_get_ns(rtc_clock),
+ old_period);
+ }
+
+ check_update_timer(s);
+ break;
+ case RTC_REG_B:
+ update_periodic_timer = (s->cmos_data[RTC_REG_B] ^ data)
+ & REG_B_PIE;
+ old_period = rtc_periodic_clock_ticks(s);
+
+ if (data & REG_B_SET) {
+ /* update cmos to when the rtc was stopping */
+ if (rtc_running(s)) {
+ rtc_update_time(s);
+ }
+ /* set mode: reset UIP mode */
+ s->cmos_data[RTC_REG_A] &= ~REG_A_UIP;
+ data &= ~REG_B_UIE;
+ } else {
+ /* if disabling set mode, update the time */
+ if ((s->cmos_data[RTC_REG_B] & REG_B_SET) &&
+ (s->cmos_data[RTC_REG_A] & 0x70) <= 0x20) {
+ s->offset = get_guest_rtc_ns(s) % NANOSECONDS_PER_SECOND;
+ rtc_set_time(s);
+ }
+ }
+ /* if an interrupt flag is already set when the interrupt
+ * becomes enabled, raise an interrupt immediately. */
+ if (data & s->cmos_data[RTC_REG_C] & REG_C_MASK) {
+ s->cmos_data[RTC_REG_C] |= REG_C_IRQF;
+ qemu_irq_raise(s->irq);
+ } else {
+ s->cmos_data[RTC_REG_C] &= ~REG_C_IRQF;
+ qemu_irq_lower(s->irq);
+ }
+ s->cmos_data[RTC_REG_B] = data;
+
+ if (update_periodic_timer) {
+ periodic_timer_update(s, qemu_clock_get_ns(rtc_clock),
+ old_period);
+ }
+
+ check_update_timer(s);
+ break;
+ case RTC_REG_C:
+ case RTC_REG_D:
+ /* cannot write to them */
+ break;
+ default:
+ s->cmos_data[s->cmos_index] = data;
+ break;
+ }
+ }
+}
+
+static inline int rtc_to_bcd(RTCState *s, int a)
+{
+ if (s->cmos_data[RTC_REG_B] & REG_B_DM) {
+ return a;
+ } else {
+ return ((a / 10) << 4) | (a % 10);
+ }
+}
+
+static inline int rtc_from_bcd(RTCState *s, int a)
+{
+ if ((a & 0xc0) == 0xc0) {
+ return -1;
+ }
+ if (s->cmos_data[RTC_REG_B] & REG_B_DM) {
+ return a;
+ } else {
+ return ((a >> 4) * 10) + (a & 0x0f);
+ }
+}
+
+static void rtc_get_time(RTCState *s, struct tm *tm)
+{
+ tm->tm_sec = rtc_from_bcd(s, s->cmos_data[RTC_SECONDS]);
+ tm->tm_min = rtc_from_bcd(s, s->cmos_data[RTC_MINUTES]);
+ tm->tm_hour = rtc_from_bcd(s, s->cmos_data[RTC_HOURS] & 0x7f);
+ if (!(s->cmos_data[RTC_REG_B] & REG_B_24H)) {
+ tm->tm_hour %= 12;
+ if (s->cmos_data[RTC_HOURS] & 0x80) {
+ tm->tm_hour += 12;
+ }
+ }
+ tm->tm_wday = rtc_from_bcd(s, s->cmos_data[RTC_DAY_OF_WEEK]) - 1;
+ tm->tm_mday = rtc_from_bcd(s, s->cmos_data[RTC_DAY_OF_MONTH]);
+ tm->tm_mon = rtc_from_bcd(s, s->cmos_data[RTC_MONTH]) - 1;
+ tm->tm_year =
+ rtc_from_bcd(s, s->cmos_data[RTC_YEAR]) + s->base_year +
+ rtc_from_bcd(s, s->cmos_data[RTC_CENTURY]) * 100 - 1900;
+}
+
+static void rtc_set_time(RTCState *s)
+{
+ struct tm tm;
+
+ rtc_get_time(s, &tm);
+ s->base_rtc = mktimegm(&tm);
+ s->last_update = qemu_clock_get_ns(rtc_clock);
+
+ qapi_event_send_rtc_change(qemu_timedate_diff(&tm));
+}
+
+static void rtc_set_cmos(RTCState *s, const struct tm *tm)
+{
+ int year;
+
+ s->cmos_data[RTC_SECONDS] = rtc_to_bcd(s, tm->tm_sec);
+ s->cmos_data[RTC_MINUTES] = rtc_to_bcd(s, tm->tm_min);
+ if (s->cmos_data[RTC_REG_B] & REG_B_24H) {
+ /* 24 hour format */
+ s->cmos_data[RTC_HOURS] = rtc_to_bcd(s, tm->tm_hour);
+ } else {
+ /* 12 hour format */
+ int h = (tm->tm_hour % 12) ? tm->tm_hour % 12 : 12;
+ s->cmos_data[RTC_HOURS] = rtc_to_bcd(s, h);
+ if (tm->tm_hour >= 12)
+ s->cmos_data[RTC_HOURS] |= 0x80;
+ }
+ s->cmos_data[RTC_DAY_OF_WEEK] = rtc_to_bcd(s, tm->tm_wday + 1);
+ s->cmos_data[RTC_DAY_OF_MONTH] = rtc_to_bcd(s, tm->tm_mday);
+ s->cmos_data[RTC_MONTH] = rtc_to_bcd(s, tm->tm_mon + 1);
+ year = tm->tm_year + 1900 - s->base_year;
+ s->cmos_data[RTC_YEAR] = rtc_to_bcd(s, year % 100);
+ s->cmos_data[RTC_CENTURY] = rtc_to_bcd(s, year / 100);
+}
+
+static void rtc_update_time(RTCState *s)
+{
+ struct tm ret;
+ time_t guest_sec;
+ int64_t guest_nsec;
+
+ guest_nsec = get_guest_rtc_ns(s);
+ guest_sec = guest_nsec / NANOSECONDS_PER_SECOND;
+ gmtime_r(&guest_sec, &ret);
+
+ /* Is SET flag of Register B disabled? */
+ if ((s->cmos_data[RTC_REG_B] & REG_B_SET) == 0) {
+ rtc_set_cmos(s, &ret);
+ }
+}
+
+static int update_in_progress(RTCState *s)
+{
+ int64_t guest_nsec;
+
+ if (!rtc_running(s)) {
+ return 0;
+ }
+ if (timer_pending(s->update_timer)) {
+ int64_t next_update_time = timer_expire_time_ns(s->update_timer);
+ /* Latch UIP until the timer expires. */
+ if (qemu_clock_get_ns(rtc_clock) >=
+ (next_update_time - UIP_HOLD_LENGTH)) {
+ s->cmos_data[RTC_REG_A] |= REG_A_UIP;
+ return 1;
+ }
+ }
+
+ guest_nsec = get_guest_rtc_ns(s);
+ /* UIP bit will be set at last 244us of every second. */
+ if ((guest_nsec % NANOSECONDS_PER_SECOND) >=
+ (NANOSECONDS_PER_SECOND - UIP_HOLD_LENGTH)) {
+ return 1;
+ }
+ return 0;
+}
+
+static uint64_t cmos_ioport_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ RTCState *s = opaque;
+ int ret;
+ if ((addr & 1) == 0) {
+ return 0xff;
+ } else {
+ switch(s->cmos_index) {
+ case RTC_IBM_PS2_CENTURY_BYTE:
+ s->cmos_index = RTC_CENTURY;
+ /* fall through */
+ case RTC_CENTURY:
+ case RTC_SECONDS:
+ case RTC_MINUTES:
+ case RTC_HOURS:
+ case RTC_DAY_OF_WEEK:
+ case RTC_DAY_OF_MONTH:
+ case RTC_MONTH:
+ case RTC_YEAR:
+ /* if not in set mode, calibrate cmos before
+ * reading*/
+ if (rtc_running(s)) {
+ rtc_update_time(s);
+ }
+ ret = s->cmos_data[s->cmos_index];
+ break;
+ case RTC_REG_A:
+ ret = s->cmos_data[s->cmos_index];
+ if (update_in_progress(s)) {
+ ret |= REG_A_UIP;
+ }
+ break;
+ case RTC_REG_C:
+ ret = s->cmos_data[s->cmos_index];
+ qemu_irq_lower(s->irq);
+ s->cmos_data[RTC_REG_C] = 0x00;
+ if (ret & (REG_C_UF | REG_C_AF)) {
+ check_update_timer(s);
+ }
+
+ if(s->irq_coalesced &&
+ (s->cmos_data[RTC_REG_B] & REG_B_PIE) &&
+ s->irq_reinject_on_ack_count < RTC_REINJECT_ON_ACK_COUNT) {
+ s->irq_reinject_on_ack_count++;
+ s->cmos_data[RTC_REG_C] |= REG_C_IRQF | REG_C_PF;
+ DPRINTF_C("cmos: injecting on ack\n");
+ if (rtc_policy_slew_deliver_irq(s)) {
+ s->irq_coalesced--;
+ DPRINTF_C("cmos: coalesced irqs decreased to %d\n",
+ s->irq_coalesced);
+ }
+ }
+ break;
+ default:
+ ret = s->cmos_data[s->cmos_index];
+ break;
+ }
+ CMOS_DPRINTF("cmos: read index=0x%02x val=0x%02x\n",
+ s->cmos_index, ret);
+ return ret;
+ }
+}
+
+void rtc_set_memory(ISADevice *dev, int addr, int val)
+{
+ RTCState *s = MC146818_RTC(dev);
+ if (addr >= 0 && addr <= 127)
+ s->cmos_data[addr] = val;
+}
+
+int rtc_get_memory(ISADevice *dev, int addr)
+{
+ RTCState *s = MC146818_RTC(dev);
+ assert(addr >= 0 && addr <= 127);
+ return s->cmos_data[addr];
+}
+
+static void rtc_set_date_from_host(ISADevice *dev)
+{
+ RTCState *s = MC146818_RTC(dev);
+ struct tm tm;
+
+ qemu_get_timedate(&tm, 0);
+
+ s->base_rtc = mktimegm(&tm);
+ s->last_update = qemu_clock_get_ns(rtc_clock);
+ s->offset = 0;
+
+ /* set the CMOS date */
+ rtc_set_cmos(s, &tm);
+}
+
+static int rtc_pre_save(void *opaque)
+{
+ RTCState *s = opaque;
+
+ rtc_update_time(s);
+
+ return 0;
+}
+
+static int rtc_post_load(void *opaque, int version_id)
+{
+ RTCState *s = opaque;
+
+ if (version_id <= 2 || rtc_clock == QEMU_CLOCK_REALTIME) {
+ rtc_set_time(s);
+ s->offset = 0;
+ check_update_timer(s);
+ }
+
+ /* The periodic timer is deterministic in record/replay mode,
+ * so there is no need to update it after loading the vmstate.
+ * Reading RTC here would misalign record and replay.
+ */
+ if (replay_mode == REPLAY_MODE_NONE) {
+ uint64_t now = qemu_clock_get_ns(rtc_clock);
+ if (now < s->next_periodic_time ||
+ now > (s->next_periodic_time + get_max_clock_jump())) {
+ periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), 0);
+ }
+ }
+
+ if (version_id >= 2) {
+ if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+ rtc_coalesced_timer_update(s);
+ }
+ }
+ return 0;
+}
+
+static bool rtc_irq_reinject_on_ack_count_needed(void *opaque)
+{
+ RTCState *s = (RTCState *)opaque;
+ return s->irq_reinject_on_ack_count != 0;
+}
+
+static const VMStateDescription vmstate_rtc_irq_reinject_on_ack_count = {
+ .name = "mc146818rtc/irq_reinject_on_ack_count",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = rtc_irq_reinject_on_ack_count_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT16(irq_reinject_on_ack_count, RTCState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_rtc = {
+ .name = "mc146818rtc",
+ .version_id = 3,
+ .minimum_version_id = 1,
+ .pre_save = rtc_pre_save,
+ .post_load = rtc_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_BUFFER(cmos_data, RTCState),
+ VMSTATE_UINT8(cmos_index, RTCState),
+ VMSTATE_UNUSED(7*4),
+ VMSTATE_TIMER_PTR(periodic_timer, RTCState),
+ VMSTATE_INT64(next_periodic_time, RTCState),
+ VMSTATE_UNUSED(3*8),
+ VMSTATE_UINT32_V(irq_coalesced, RTCState, 2),
+ VMSTATE_UINT32_V(period, RTCState, 2),
+ VMSTATE_UINT64_V(base_rtc, RTCState, 3),
+ VMSTATE_UINT64_V(last_update, RTCState, 3),
+ VMSTATE_INT64_V(offset, RTCState, 3),
+ VMSTATE_TIMER_PTR_V(update_timer, RTCState, 3),
+ VMSTATE_UINT64_V(next_alarm_time, RTCState, 3),
+ VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_rtc_irq_reinject_on_ack_count,
+ NULL
+ }
+};
+
+/* set CMOS shutdown status register (index 0xF) as S3_resume(0xFE)
+ BIOS will read it and start S3 resume at POST Entry */
+static void rtc_notify_suspend(Notifier *notifier, void *data)
+{
+ RTCState *s = container_of(notifier, RTCState, suspend_notifier);
+ rtc_set_memory(ISA_DEVICE(s), 0xF, 0xFE);
+}
+
+static void rtc_reset(void *opaque)
+{
+ RTCState *s = opaque;
+
+ s->cmos_data[RTC_REG_B] &= ~(REG_B_PIE | REG_B_AIE | REG_B_SQWE);
+ s->cmos_data[RTC_REG_C] &= ~(REG_C_UF | REG_C_IRQF | REG_C_PF | REG_C_AF);
+ check_update_timer(s);
+
+ qemu_irq_lower(s->irq);
+
+ if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+ s->irq_coalesced = 0;
+ s->irq_reinject_on_ack_count = 0;
+ }
+}
+
+static const MemoryRegionOps cmos_ops = {
+ .read = cmos_ioport_read,
+ .write = cmos_ioport_write,
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 1,
+ },
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void rtc_get_date(Object *obj, struct tm *current_tm, Error **errp)
+{
+ RTCState *s = MC146818_RTC(obj);
+
+ rtc_update_time(s);
+ rtc_get_time(s, current_tm);
+}
+
+static void rtc_realizefn(DeviceState *dev, Error **errp)
+{
+ ISADevice *isadev = ISA_DEVICE(dev);
+ RTCState *s = MC146818_RTC(dev);
+ int base = 0x70;
+
+ s->cmos_data[RTC_REG_A] = 0x26;
+ s->cmos_data[RTC_REG_B] = 0x02;
+ s->cmos_data[RTC_REG_C] = 0x00;
+ s->cmos_data[RTC_REG_D] = 0x80;
+
+ /* This is for historical reasons. The default base year qdev property
+ * was set to 2000 for most machine types before the century byte was
+ * implemented.
+ *
+ * This if statement means that the century byte will be always 0
+ * (at least until 2079...) for base_year = 1980, but will be set
+ * correctly for base_year = 2000.
+ */
+ if (s->base_year == 2000) {
+ s->base_year = 0;
+ }
+
+ rtc_set_date_from_host(isadev);
+
+ switch (s->lost_tick_policy) {
+#ifdef TARGET_I386
+ case LOST_TICK_POLICY_SLEW:
+ s->coalesced_timer =
+ timer_new_ns(rtc_clock, rtc_coalesced_timer, s);
+ break;
+#endif
+ case LOST_TICK_POLICY_DISCARD:
+ break;
+ default:
+ error_setg(errp, "Invalid lost tick policy.");
+ return;
+ }
+
+ s->periodic_timer = timer_new_ns(rtc_clock, rtc_periodic_timer, s);
+ s->update_timer = timer_new_ns(rtc_clock, rtc_update_timer, s);
+ check_update_timer(s);
+
+ s->suspend_notifier.notify = rtc_notify_suspend;
+ qemu_register_suspend_notifier(&s->suspend_notifier);
+
+ memory_region_init_io(&s->io, OBJECT(s), &cmos_ops, s, "rtc", 2);
+ isa_register_ioport(isadev, &s->io, base);
+
+ /* register rtc 0x70 port for coalesced_pio */
+ memory_region_set_flush_coalesced(&s->io);
+ memory_region_init_io(&s->coalesced_io, OBJECT(s), &cmos_ops,
+ s, "rtc-index", 1);
+ memory_region_add_subregion(&s->io, 0, &s->coalesced_io);
+ memory_region_add_coalescing(&s->coalesced_io, 0, 1);
+
+ qdev_set_legacy_instance_id(dev, base, 3);
+ qemu_register_reset(rtc_reset, s);
+
+ object_property_add_tm(OBJECT(s), "date", rtc_get_date, NULL);
+
+ qdev_init_gpio_out(dev, &s->irq, 1);
++ QLIST_INSERT_HEAD(&rtc_devices, s, link);
+}
+
+ISADevice *mc146818_rtc_init(ISABus *bus, int base_year, qemu_irq intercept_irq)
+{
+ DeviceState *dev;
+ ISADevice *isadev;
- s = MC146818_RTC(isadev);
+
+ isadev = isa_create(bus, TYPE_MC146818_RTC);
+ dev = DEVICE(isadev);
- QLIST_INSERT_HEAD(&rtc_devices, s, link);
+ qdev_prop_set_int32(dev, "base_year", base_year);
+ qdev_init_nofail(dev);
+ if (intercept_irq) {
+ qdev_connect_gpio_out(dev, 0, intercept_irq);
+ } else {
+ isa_connect_gpio_out(isadev, 0, RTC_ISA_IRQ);
+ }
- object_property_add_alias(qdev_get_machine(), "rtc-time", OBJECT(s),
+
- /* Reason: needs to be wired up by rtc_init() */
- dc->user_creatable = false;
++ object_property_add_alias(qdev_get_machine(), "rtc-time", OBJECT(isadev),
+ "date", NULL);
+
+ return isadev;
+}
+
+static Property mc146818rtc_properties[] = {
+ DEFINE_PROP_INT32("base_year", RTCState, base_year, 1980),
+ DEFINE_PROP_LOSTTICKPOLICY("lost_tick_policy", RTCState,
+ lost_tick_policy, LOST_TICK_POLICY_DISCARD),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void rtc_resetdev(DeviceState *d)
+{
+ RTCState *s = MC146818_RTC(d);
+
+ /* Reason: VM do suspend self will set 0xfe
+ * Reset any values other than 0xfe(Guest suspend case) */
+ if (s->cmos_data[0x0f] != 0xfe) {
+ s->cmos_data[0x0f] = 0x00;
+ }
+}
+
+static void rtc_class_initfn(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = rtc_realizefn;
+ dc->reset = rtc_resetdev;
+ dc->vmsd = &vmstate_rtc;
+ dc->props = mc146818rtc_properties;
+}
+
+static const TypeInfo mc146818rtc_info = {
+ .name = TYPE_MC146818_RTC,
+ .parent = TYPE_ISA_DEVICE,
+ .instance_size = sizeof(RTCState),
+ .class_init = rtc_class_initfn,
+};
+
+static void mc146818rtc_register_types(void)
+{
+ type_register_static(&mc146818rtc_info);
+}
+
+type_init(mc146818rtc_register_types)