Unlike X64, AArch64 does not have PIO.

tiny_kernel.s

start:
    /* Hello */
    mov w0, #0x48          // 'H'
    bl  output_char
    mov w0, #0x65          // 'e'
    bl  output_char
    mov w0, #0x6c          // 'l'
    bl  output_char
    mov w0, #0x6c          // 'l'
    bl  output_char
    mov w0, #0x6f          // 'o'
    bl  output_char
    mov w0, #0x2c          // ','
    bl  output_char

    /* world */
    mov w0, #0x77          // 'w'
    bl  output_char
    mov w0, #0x6f          // 'o'
    bl  output_char
    mov w0, #0x72          // 'r'
    bl  output_char
    mov w0, #0x6c          // 'l'
    bl  output_char
    mov w0, #0x64          // 'd'
    bl  output_char

    mov w0, #0x0a          // '\n'
    bl  output_char

    wfi                    // Wait for interrupt (equivalent to hlt)

output_char:
    ldr x1, =0xf1000000    // Load output port address
    strb w0, [x1]          // Store byte to output port
    ret                    // Return

tiny_qemu.c

kvm_fd = open(KVM_DEVICE, O_RDWR);
vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0);
vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
mmap_size = ioctl(kvm_fd, KVM_GET_VCPU_MMAP_SIZE, 0);

init = (struct kvm_vcpu_init){
    .target = QEMU_KVM_ARM_TARGET_NONE,
    .features = {0},
};
init.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
init.features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;

if (init.target == -1) {
    struct kvm_vcpu_init preferred;

    r = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred);
    init.target = preferred.target;
}
r = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &init);

struct kvm_device_attr attr = {
    .group = KVM_ARM_VCPU_PMU_V3_CTRL,
    .attr = KVM_ARM_VCPU_PMU_V3_INIT,
};
r = ioctl(vcpu_fd, KVM_HAS_DEVICE_ATTR, &attr);
r = ioctl(vcpu_fd, KVM_SET_DEVICE_ATTR, &attr);

guest_mem = mmap(NULL, MEM_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
mem = (struct kvm_userspace_memory_region){
    .slot = 0,
    .flags = 0,
    .guest_phys_addr = PHYS_MEM_START,
    .memory_size = MEM_SIZE,
    .userspace_addr = (unsigned long)guest_mem,
};
ioctl(vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);

FILE *bin = fopen(TINY_KERNEL_FILE, "rb");
fread(guest_mem, 1, MEM_SIZE, bin);
fclose(bin);

struct kvm_reg_list dummy = { .n = 0, };
r = ioctl(vcpu_fd, KVM_GET_REG_LIST, &dummy);
struct kvm_reg_list *reg_list = malloc(sizeof(struct kvm_reg_list) +
                                       dummy.n * sizeof(__u64));
reg_list->n = dummy.n;
r = ioctl(vcpu_fd, KVM_GET_REG_LIST, reg_list);

__u64 pc = (__u64)PHYS_MEM_START;

struct kvm_one_reg reg = {
    .id = AARCH64_CORE_REG(regs.pc),
    .addr = (unsigned long)&pc,
};
r = ioctl(vcpu_fd, KVM_SET_ONE_REG, &reg);

while (1)
{
    r = ioctl(vcpu_fd, KVM_RUN, 0);
    run = mmap(NULL, sizeof(struct kvm_run), PROT_READ | PROT_WRITE, MAP_SHARED, vcpu_fd, 0);
    switch (run->exit_reason) {
    case KVM_EXIT_MMIO:
        putchar(*(char*)run->mmio.data);
        break;
    }
}