|  | /* | 
|  | *  QEMU model of the Milkymist programmable FPU. | 
|  | * | 
|  | *  Copyright (c) 2010 Michael Walle <michael@walle.cc> | 
|  | * | 
|  | * This library is free software; you can redistribute it and/or | 
|  | * modify it under the terms of the GNU Lesser General Public | 
|  | * License as published by the Free Software Foundation; either | 
|  | * version 2 of the License, or (at your option) any later version. | 
|  | * | 
|  | * This library is distributed in the hope that it will be useful, | 
|  | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | * Lesser General Public License for more details. | 
|  | * | 
|  | * You should have received a copy of the GNU Lesser General Public | 
|  | * License along with this library; if not, see <http://www.gnu.org/licenses/>. | 
|  | * | 
|  | * | 
|  | * Specification available at: | 
|  | *   http://www.milkymist.org/socdoc/pfpu.pdf | 
|  | * | 
|  | */ | 
|  |  | 
|  | #include "hw.h" | 
|  | #include "sysbus.h" | 
|  | #include "trace.h" | 
|  | #include "qemu/log.h" | 
|  | #include "qemu/error-report.h" | 
|  | #include <math.h> | 
|  |  | 
|  | /* #define TRACE_EXEC */ | 
|  |  | 
|  | #ifdef TRACE_EXEC | 
|  | #    define D_EXEC(x) x | 
|  | #else | 
|  | #    define D_EXEC(x) | 
|  | #endif | 
|  |  | 
|  | enum { | 
|  | R_CTL = 0, | 
|  | R_MESHBASE, | 
|  | R_HMESHLAST, | 
|  | R_VMESHLAST, | 
|  | R_CODEPAGE, | 
|  | R_VERTICES, | 
|  | R_COLLISIONS, | 
|  | R_STRAYWRITES, | 
|  | R_LASTDMA, | 
|  | R_PC, | 
|  | R_DREGBASE, | 
|  | R_CODEBASE, | 
|  | R_MAX | 
|  | }; | 
|  |  | 
|  | enum { | 
|  | CTL_START_BUSY = (1<<0), | 
|  | }; | 
|  |  | 
|  | enum { | 
|  | OP_NOP = 0, | 
|  | OP_FADD, | 
|  | OP_FSUB, | 
|  | OP_FMUL, | 
|  | OP_FABS, | 
|  | OP_F2I, | 
|  | OP_I2F, | 
|  | OP_VECTOUT, | 
|  | OP_SIN, | 
|  | OP_COS, | 
|  | OP_ABOVE, | 
|  | OP_EQUAL, | 
|  | OP_COPY, | 
|  | OP_IF, | 
|  | OP_TSIGN, | 
|  | OP_QUAKE, | 
|  | }; | 
|  |  | 
|  | enum { | 
|  | GPR_X = 0, | 
|  | GPR_Y = 1, | 
|  | GPR_FLAGS = 2, | 
|  | }; | 
|  |  | 
|  | enum { | 
|  | LATENCY_FADD = 5, | 
|  | LATENCY_FSUB = 5, | 
|  | LATENCY_FMUL = 7, | 
|  | LATENCY_FABS = 2, | 
|  | LATENCY_F2I = 2, | 
|  | LATENCY_I2F = 3, | 
|  | LATENCY_VECTOUT = 0, | 
|  | LATENCY_SIN = 4, | 
|  | LATENCY_COS = 4, | 
|  | LATENCY_ABOVE = 2, | 
|  | LATENCY_EQUAL = 2, | 
|  | LATENCY_COPY = 2, | 
|  | LATENCY_IF = 2, | 
|  | LATENCY_TSIGN = 2, | 
|  | LATENCY_QUAKE = 2, | 
|  | MAX_LATENCY = 7 | 
|  | }; | 
|  |  | 
|  | #define GPR_BEGIN       0x100 | 
|  | #define GPR_END         0x17f | 
|  | #define MICROCODE_BEGIN 0x200 | 
|  | #define MICROCODE_END   0x3ff | 
|  | #define MICROCODE_WORDS 2048 | 
|  |  | 
|  | #define REINTERPRET_CAST(type, val) (*((type *)&(val))) | 
|  |  | 
|  | #ifdef TRACE_EXEC | 
|  | static const char *opcode_to_str[] = { | 
|  | "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT", | 
|  | "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE", | 
|  | }; | 
|  | #endif | 
|  |  | 
|  | struct MilkymistPFPUState { | 
|  | SysBusDevice busdev; | 
|  | MemoryRegion regs_region; | 
|  | CharDriverState *chr; | 
|  | qemu_irq irq; | 
|  |  | 
|  | uint32_t regs[R_MAX]; | 
|  | uint32_t gp_regs[128]; | 
|  | uint32_t microcode[MICROCODE_WORDS]; | 
|  |  | 
|  | int output_queue_pos; | 
|  | uint32_t output_queue[MAX_LATENCY]; | 
|  | }; | 
|  | typedef struct MilkymistPFPUState MilkymistPFPUState; | 
|  |  | 
|  | static inline hwaddr | 
|  | get_dma_address(uint32_t base, uint32_t x, uint32_t y) | 
|  | { | 
|  | return base + 8 * (128 * y + x); | 
|  | } | 
|  |  | 
|  | static inline void | 
|  | output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos) | 
|  | { | 
|  | s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val; | 
|  | } | 
|  |  | 
|  | static inline uint32_t | 
|  | output_queue_remove(MilkymistPFPUState *s) | 
|  | { | 
|  | return s->output_queue[s->output_queue_pos]; | 
|  | } | 
|  |  | 
|  | static inline void | 
|  | output_queue_advance(MilkymistPFPUState *s) | 
|  | { | 
|  | s->output_queue[s->output_queue_pos] = 0; | 
|  | s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY; | 
|  | } | 
|  |  | 
|  | static int pfpu_decode_insn(MilkymistPFPUState *s) | 
|  | { | 
|  | uint32_t pc = s->regs[R_PC]; | 
|  | uint32_t insn = s->microcode[pc]; | 
|  | uint32_t reg_a = (insn >> 18) & 0x7f; | 
|  | uint32_t reg_b = (insn >> 11) & 0x7f; | 
|  | uint32_t op = (insn >> 7) & 0xf; | 
|  | uint32_t reg_d = insn & 0x7f; | 
|  | uint32_t r = 0; | 
|  | int latency = 0; | 
|  |  | 
|  | switch (op) { | 
|  | case OP_NOP: | 
|  | break; | 
|  | case OP_FADD: | 
|  | { | 
|  | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); | 
|  | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); | 
|  | float t = a + b; | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_FADD; | 
|  | D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); | 
|  | } break; | 
|  | case OP_FSUB: | 
|  | { | 
|  | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); | 
|  | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); | 
|  | float t = a - b; | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_FSUB; | 
|  | D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); | 
|  | } break; | 
|  | case OP_FMUL: | 
|  | { | 
|  | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); | 
|  | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); | 
|  | float t = a * b; | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_FMUL; | 
|  | D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); | 
|  | } break; | 
|  | case OP_FABS: | 
|  | { | 
|  | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); | 
|  | float t = fabsf(a); | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_FABS; | 
|  | D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r)); | 
|  | } break; | 
|  | case OP_F2I: | 
|  | { | 
|  | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); | 
|  | int32_t t = a; | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_F2I; | 
|  | D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r)); | 
|  | } break; | 
|  | case OP_I2F: | 
|  | { | 
|  | int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); | 
|  | float t = a; | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_I2F; | 
|  | D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r)); | 
|  | } break; | 
|  | case OP_VECTOUT: | 
|  | { | 
|  | uint32_t a = cpu_to_be32(s->gp_regs[reg_a]); | 
|  | uint32_t b = cpu_to_be32(s->gp_regs[reg_b]); | 
|  | hwaddr dma_ptr = | 
|  | get_dma_address(s->regs[R_MESHBASE], | 
|  | s->gp_regs[GPR_X], s->gp_regs[GPR_Y]); | 
|  | cpu_physical_memory_write(dma_ptr, (uint8_t *)&a, 4); | 
|  | cpu_physical_memory_write(dma_ptr + 4, (uint8_t *)&b, 4); | 
|  | s->regs[R_LASTDMA] = dma_ptr + 4; | 
|  | D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr)); | 
|  | trace_milkymist_pfpu_vectout(a, b, dma_ptr); | 
|  | } break; | 
|  | case OP_SIN: | 
|  | { | 
|  | int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); | 
|  | float t = sinf(a * (1.0f / (M_PI * 4096.0f))); | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_SIN; | 
|  | D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r)); | 
|  | } break; | 
|  | case OP_COS: | 
|  | { | 
|  | int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); | 
|  | float t = cosf(a * (1.0f / (M_PI * 4096.0f))); | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_COS; | 
|  | D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r)); | 
|  | } break; | 
|  | case OP_ABOVE: | 
|  | { | 
|  | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); | 
|  | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); | 
|  | float t = (a > b) ? 1.0f : 0.0f; | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_ABOVE; | 
|  | D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); | 
|  | } break; | 
|  | case OP_EQUAL: | 
|  | { | 
|  | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); | 
|  | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); | 
|  | float t = (a == b) ? 1.0f : 0.0f; | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_EQUAL; | 
|  | D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); | 
|  | } break; | 
|  | case OP_COPY: | 
|  | { | 
|  | r = s->gp_regs[reg_a]; | 
|  | latency = LATENCY_COPY; | 
|  | D_EXEC(qemu_log("COPY")); | 
|  | } break; | 
|  | case OP_IF: | 
|  | { | 
|  | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); | 
|  | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); | 
|  | uint32_t f = s->gp_regs[GPR_FLAGS]; | 
|  | float t = (f != 0) ? a : b; | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_IF; | 
|  | D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r)); | 
|  | } break; | 
|  | case OP_TSIGN: | 
|  | { | 
|  | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); | 
|  | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); | 
|  | float t = (b < 0) ? -a : a; | 
|  | r = REINTERPRET_CAST(uint32_t, t); | 
|  | latency = LATENCY_TSIGN; | 
|  | D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); | 
|  | } break; | 
|  | case OP_QUAKE: | 
|  | { | 
|  | uint32_t a = s->gp_regs[reg_a]; | 
|  | r = 0x5f3759df - (a >> 1); | 
|  | latency = LATENCY_QUAKE; | 
|  | D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r)); | 
|  | } break; | 
|  |  | 
|  | default: | 
|  | error_report("milkymist_pfpu: unknown opcode %d", op); | 
|  | break; | 
|  | } | 
|  |  | 
|  | if (!reg_d) { | 
|  | D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n", | 
|  | s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency, | 
|  | s->regs[R_PC] + latency)); | 
|  | } else { | 
|  | D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n", | 
|  | s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency, | 
|  | s->regs[R_PC] + latency, reg_d)); | 
|  | } | 
|  |  | 
|  | if (op == OP_VECTOUT) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* store output for this cycle */ | 
|  | if (reg_d) { | 
|  | uint32_t val = output_queue_remove(s); | 
|  | D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val)); | 
|  | s->gp_regs[reg_d] = val; | 
|  | } | 
|  |  | 
|  | output_queue_advance(s); | 
|  |  | 
|  | /* store op output */ | 
|  | if (op != OP_NOP) { | 
|  | output_queue_insert(s, r, latency-1); | 
|  | } | 
|  |  | 
|  | /* advance PC */ | 
|  | s->regs[R_PC]++; | 
|  |  | 
|  | return 1; | 
|  | }; | 
|  |  | 
|  | static void pfpu_start(MilkymistPFPUState *s) | 
|  | { | 
|  | int x, y; | 
|  | int i; | 
|  |  | 
|  | for (y = 0; y <= s->regs[R_VMESHLAST]; y++) { | 
|  | for (x = 0; x <= s->regs[R_HMESHLAST]; x++) { | 
|  | D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y)); | 
|  |  | 
|  | /* set current position */ | 
|  | s->gp_regs[GPR_X] = x; | 
|  | s->gp_regs[GPR_Y] = y; | 
|  |  | 
|  | /* run microcode on this position */ | 
|  | i = 0; | 
|  | while (pfpu_decode_insn(s)) { | 
|  | /* decode at most MICROCODE_WORDS instructions */ | 
|  | if (i++ >= MICROCODE_WORDS) { | 
|  | error_report("milkymist_pfpu: too many instructions " | 
|  | "executed in microcode. No VECTOUT?"); | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* reset pc for next run */ | 
|  | s->regs[R_PC] = 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | s->regs[R_VERTICES] = x * y; | 
|  |  | 
|  | trace_milkymist_pfpu_pulse_irq(); | 
|  | qemu_irq_pulse(s->irq); | 
|  | } | 
|  |  | 
|  | static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr) | 
|  | { | 
|  | return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN; | 
|  | } | 
|  |  | 
|  | static uint64_t pfpu_read(void *opaque, hwaddr addr, | 
|  | unsigned size) | 
|  | { | 
|  | MilkymistPFPUState *s = opaque; | 
|  | uint32_t r = 0; | 
|  |  | 
|  | addr >>= 2; | 
|  | switch (addr) { | 
|  | case R_CTL: | 
|  | case R_MESHBASE: | 
|  | case R_HMESHLAST: | 
|  | case R_VMESHLAST: | 
|  | case R_CODEPAGE: | 
|  | case R_VERTICES: | 
|  | case R_COLLISIONS: | 
|  | case R_STRAYWRITES: | 
|  | case R_LASTDMA: | 
|  | case R_PC: | 
|  | case R_DREGBASE: | 
|  | case R_CODEBASE: | 
|  | r = s->regs[addr]; | 
|  | break; | 
|  | case GPR_BEGIN ... GPR_END: | 
|  | r = s->gp_regs[addr - GPR_BEGIN]; | 
|  | break; | 
|  | case MICROCODE_BEGIN ...  MICROCODE_END: | 
|  | r = s->microcode[get_microcode_address(s, addr)]; | 
|  | break; | 
|  |  | 
|  | default: | 
|  | error_report("milkymist_pfpu: read access to unknown register 0x" | 
|  | TARGET_FMT_plx, addr << 2); | 
|  | break; | 
|  | } | 
|  |  | 
|  | trace_milkymist_pfpu_memory_read(addr << 2, r); | 
|  |  | 
|  | return r; | 
|  | } | 
|  |  | 
|  | static void pfpu_write(void *opaque, hwaddr addr, uint64_t value, | 
|  | unsigned size) | 
|  | { | 
|  | MilkymistPFPUState *s = opaque; | 
|  |  | 
|  | trace_milkymist_pfpu_memory_write(addr, value); | 
|  |  | 
|  | addr >>= 2; | 
|  | switch (addr) { | 
|  | case R_CTL: | 
|  | if (value & CTL_START_BUSY) { | 
|  | pfpu_start(s); | 
|  | } | 
|  | break; | 
|  | case R_MESHBASE: | 
|  | case R_HMESHLAST: | 
|  | case R_VMESHLAST: | 
|  | case R_CODEPAGE: | 
|  | case R_VERTICES: | 
|  | case R_COLLISIONS: | 
|  | case R_STRAYWRITES: | 
|  | case R_LASTDMA: | 
|  | case R_PC: | 
|  | case R_DREGBASE: | 
|  | case R_CODEBASE: | 
|  | s->regs[addr] = value; | 
|  | break; | 
|  | case GPR_BEGIN ...  GPR_END: | 
|  | s->gp_regs[addr - GPR_BEGIN] = value; | 
|  | break; | 
|  | case MICROCODE_BEGIN ...  MICROCODE_END: | 
|  | s->microcode[get_microcode_address(s, addr)] = value; | 
|  | break; | 
|  |  | 
|  | default: | 
|  | error_report("milkymist_pfpu: write access to unknown register 0x" | 
|  | TARGET_FMT_plx, addr << 2); | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | static const MemoryRegionOps pfpu_mmio_ops = { | 
|  | .read = pfpu_read, | 
|  | .write = pfpu_write, | 
|  | .valid = { | 
|  | .min_access_size = 4, | 
|  | .max_access_size = 4, | 
|  | }, | 
|  | .endianness = DEVICE_NATIVE_ENDIAN, | 
|  | }; | 
|  |  | 
|  | static void milkymist_pfpu_reset(DeviceState *d) | 
|  | { | 
|  | MilkymistPFPUState *s = container_of(d, MilkymistPFPUState, busdev.qdev); | 
|  | int i; | 
|  |  | 
|  | for (i = 0; i < R_MAX; i++) { | 
|  | s->regs[i] = 0; | 
|  | } | 
|  | for (i = 0; i < 128; i++) { | 
|  | s->gp_regs[i] = 0; | 
|  | } | 
|  | for (i = 0; i < MICROCODE_WORDS; i++) { | 
|  | s->microcode[i] = 0; | 
|  | } | 
|  | s->output_queue_pos = 0; | 
|  | for (i = 0; i < MAX_LATENCY; i++) { | 
|  | s->output_queue[i] = 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | static int milkymist_pfpu_init(SysBusDevice *dev) | 
|  | { | 
|  | MilkymistPFPUState *s = FROM_SYSBUS(typeof(*s), dev); | 
|  |  | 
|  | sysbus_init_irq(dev, &s->irq); | 
|  |  | 
|  | memory_region_init_io(&s->regs_region, &pfpu_mmio_ops, s, | 
|  | "milkymist-pfpu", MICROCODE_END * 4); | 
|  | sysbus_init_mmio(dev, &s->regs_region); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static const VMStateDescription vmstate_milkymist_pfpu = { | 
|  | .name = "milkymist-pfpu", | 
|  | .version_id = 1, | 
|  | .minimum_version_id = 1, | 
|  | .minimum_version_id_old = 1, | 
|  | .fields      = (VMStateField[]) { | 
|  | VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX), | 
|  | VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128), | 
|  | VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS), | 
|  | VMSTATE_INT32(output_queue_pos, MilkymistPFPUState), | 
|  | VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY), | 
|  | VMSTATE_END_OF_LIST() | 
|  | } | 
|  | }; | 
|  |  | 
|  | static void milkymist_pfpu_class_init(ObjectClass *klass, void *data) | 
|  | { | 
|  | DeviceClass *dc = DEVICE_CLASS(klass); | 
|  | SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); | 
|  |  | 
|  | k->init = milkymist_pfpu_init; | 
|  | dc->reset = milkymist_pfpu_reset; | 
|  | dc->vmsd = &vmstate_milkymist_pfpu; | 
|  | } | 
|  |  | 
|  | static const TypeInfo milkymist_pfpu_info = { | 
|  | .name          = "milkymist-pfpu", | 
|  | .parent        = TYPE_SYS_BUS_DEVICE, | 
|  | .instance_size = sizeof(MilkymistPFPUState), | 
|  | .class_init    = milkymist_pfpu_class_init, | 
|  | }; | 
|  |  | 
|  | static void milkymist_pfpu_register_types(void) | 
|  | { | 
|  | type_register_static(&milkymist_pfpu_info); | 
|  | } | 
|  |  | 
|  | type_init(milkymist_pfpu_register_types) |