Merge remote-tracking branch 'agraf/tags/signed-ppc-for-upstream-1.7' into staging

[qemu.git] / tcg / README
diff --git a/tcg/README b/tcg/README

index ec1ac79375aa8911497a24147ab45f4cadcd1a39..f1782123b753538359f89ed2cf8188844de46534 100644 (file)
--- a/tcg/README
+++ b/tcg/README
@@ -14,6 +14,10 @@ the emulated architecture. As TCG started as a generic C backend used
  for cross compiling, it is assumed that the TCG target is different
  from the host, although it is never the case for QEMU.
  
+In this document, we use "guest" to specify what architecture we are
+emulating; "target" always means the TCG target, the machine on which
+we are running QEMU.
+
  A TCG "function" corresponds to a QEMU Translated Block (TB).
  
  A TCG "temporary" is a variable only live in a basic
@@ -361,7 +365,25 @@ Write 8, 16, 32 or 64 bits to host memory.
  All this opcodes assume that the pointed host memory doesn't correspond
  to a global. In the latter case the behaviour is unpredictable.
  
-********* 64-bit target on 32-bit host support
+********* Multiword arithmetic support
+
+* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+
+Similar to add/sub, except that the double-word inputs T1 and T2 are
+formed from two single-word arguments, and the double-word output T0
+is returned in two single-word outputs.
+
+* mulu2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mul, except two unsigned inputs T1 and T2 yielding the full
+double-word product T0.  The later is returned in two single-word outputs.
+
+* muls2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mulu2, except the two inputs T1 and T2 are signed.
+
+********* 64-bit guest on 32-bit host support
  
  The following opcodes are internal to TCG.  Thus they are to be implemented by
  32-bit host code generators, but are not to be emitted by guest translators.
@@ -372,18 +394,6 @@ They are emitted as needed by inline functions within "tcg-op.h".
  Similar to brcond, except that the 64-bit values T0 and T1
  are formed from two 32-bit arguments.
  
-* add2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
-* sub2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
-
-Similar to add/sub, except that the 64-bit inputs T1 and T2 are
-formed from two 32-bit arguments, and the 64-bit output T0
-is returned in two 32-bit outputs.
-
-* mulu2_i32 t0_low, t0_high, t1, t2
-
-Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding
-the full 64-bit product T0.  The later is returned in two 32-bit outputs.
-
  * setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond
  
  Similar to setcond, except that the 64-bit values T1 and T2 are
@@ -402,30 +412,25 @@ current TB was linked to this TB. Otherwise execute the next
  instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
  at most once with each slot index per TB.
  
-* qemu_ld8u t0, t1, flags
-qemu_ld8s t0, t1, flags
-qemu_ld16u t0, t1, flags
-qemu_ld16s t0, t1, flags
-qemu_ld32 t0, t1, flags
-qemu_ld32u t0, t1, flags
-qemu_ld32s t0, t1, flags
-qemu_ld64 t0, t1, flags
+* qemu_ld_i32/i64 t0, t1, flags, memidx
+* qemu_st_i32/i64 t0, t1, flags, memidx
+
+Load data at the guest address t1 into t0, or store data in t0 at guest
+address t1.  The _i32/_i64 size applies to the size of the input/output
+register t0 only.  The address t1 is always sized according to the guest,
+and the width of the memory operation is controlled by flags.
  
-Load data at the QEMU CPU address t1 into t0. t1 has the QEMU CPU address
-type. 'flags' contains the QEMU memory index (selects user or kernel access)
-for example.
+Both t0 and t1 may be split into little-endian ordered pairs of registers
+if dealing with 64-bit quantities on a 32-bit host.
  
-Note that "qemu_ld32" implies a 32-bit result, while "qemu_ld32u" and
-"qemu_ld32s" imply a 64-bit result appropriately extended from 32 bits.
+The memidx selects the qemu tlb index to use (e.g. user or kernel access).
+The flags are the TCGMemOp bits, selecting the sign, width, and endianness
+of the memory access.
  
-* qemu_st8 t0, t1, flags
-qemu_st16 t0, t1, flags
-qemu_st32 t0, t1, flags
-qemu_st64 t0, t1, flags
+For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
+64-bit memory access specified in flags.
  
-Store the data t0 at the QEMU CPU Address t1. t1 has the QEMU CPU
-address type. 'flags' contains the QEMU memory index (selects user or
-kernel access) for example.
+*********
  
  Note 1: Some shortcuts are defined when the last operand is known to be
  a constant (e.g. addi for add, movi for mov).
@@ -515,9 +520,9 @@ register.
    a better generated code, but it reduces the memory usage of TCG and
    the speed of the translation.
  
-- Don't hesitate to use helpers for complicated or seldom used target
+- Don't hesitate to use helpers for complicated or seldom used guest
    instructions. There is little performance advantage in using TCG to
-  implement target instructions taking more than about twenty TCG
+  implement guest instructions taking more than about twenty TCG
    instructions. Note that this rule of thumb is more applicable to
    helpers doing complex logic or arithmetic, where the C compiler has
    scope to do a good job of optimisation; it is less relevant where
@@ -525,9 +530,9 @@ register.
    inline TCG may still be faster for longer sequences.
  
  - The hard limit on the number of TCG instructions you can generate
-  per target instruction is set by MAX_OP_PER_INSTR in exec-all.h --
+  per guest instruction is set by MAX_OP_PER_INSTR in exec-all.h --
    you cannot exceed this without risking a buffer overrun.
  
  - Use the 'discard' instruction if you know that TCG won't be able to
    prove that a given global is "dead" at a given program point. The
-  x86 target uses it to improve the condition codes optimisation.
+  x86 guest uses it to improve the condition codes optimisation.