[gforth] / gforth / arch / amd64 / asm.fs  

gforth: gforth/arch/amd64/asm.fs

File: [gforth] / gforth / arch / amd64 / asm.fs (download)
Revision: 1.12, Thu Aug 12 21:14:33 2010 UTC (2 years, 9 months ago) by dvdkhlng
Branch: MAIN
Changes since 1.11: +17 -15 lines
Repaired x86 opcodes that should not/must not output REX prefixes

\         *** Assembler for the Athlon64 ***           17jul04py

\ Copyright (C) 1992-2000 by Bernd Paysan (486 assemlber)

\ Copyright (C) 2000,2001,2003,2004,2007 Free Software Foundation, Inc.

\ This file is part of Gforth.

\ Gforth is free software; you can redistribute it and/or
\ modify it under the terms of the GNU General Public License
\ as published by the Free Software Foundation, either version 3
\ of the License, or (at your option) any later version.

\ This program is distributed in the hope that it will be useful,
\ but WITHOUT ANY WARRANTY; without even the implied warranty of
\ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
\ GNU General Public License for more details.

\ You should have received a copy of the GNU General Public License
\ along with this program. If not, see http://www.gnu.org/licenses/.
\
\ The syntax is reverse polish. Source and destination are
\ reversed. Size prefixes are used instead of AX/EAX. Example:
\ Intel                           gives
\ mov  ax,bx                      .w bx ax mov
\ mov  eax,[ebx]                  .d bx ) ax mov
\ add  eax,4                      .d 4 # ax add
\ add  rax,8                      .q 8 # ax add
\ 
\ in .86 mode  .w is the default size, in .386 mode  .d is default
\ in .amd64 mode .q is default size.
\ .wa, .da, and .qa change address size. .b, .w(a) and .d(a) are not
\ switches like in my assem68k, they are prefixes.
\ [A-D][L|H] implicitely set the .b size. So
\ AH AL mov
\ generates a byte move. Sure you need .b for memory operations
\ like .b ax ) inc    which is  inc  BYTE PTR [eAX]

\ athlon64 Assembler Load Screen                       21apr00py

base @ get-current ALSO ASSEMBLER DEFINITIONS also

&8 base !

: [F]  Forth     ; immediate
: [A]  Assembler ; immediate

\ Assembler Forth words                                11mar00py

: user' ' >body @ ; immediate
: case? ( n1 n2 -- t / n1 f )
    over = IF  drop true  ELSE  false  THEN ;

\ Code generating primitives                           07mar93py

Variable >codes
: (+rel      ;
Create nrc  ' c, A, ' here A, ' allot A, ' c! A, ' (+rel A,

: nonrelocate   nrc >codes ! ;      nonrelocate

: >exec   Create  dup c,  cell+
            Does>  c@  >codes @  +  perform ;

0
>exec ,       >exec here    >exec allot   >exec c!
>exec +rel
drop

\ Stack-Buffer für Extra-Werte                         22dec93py

Variable ModR/M               Variable ModR/M#
Variable SIB                  Variable SIB#
Variable disp                 Variable disp#
Variable imm                  Variable imm#
Variable Aimm?                Variable Adisp?
Variable byte?                Variable seg
Variable .asize               Variable .anow
Variable .osize               Variable .onow
Variable .64size              Variable .64now
Variable .rex                 Variable .64bit
Variable .arel                Variable media
: pre-    seg off  .asize @ .anow !  .osize @ .onow !
  .rex off  .arel off  .64size @ .64now !  media off ;
: sclear  pre-  Aimm? off  Adisp? off
    ModR/M# off  SIB# off  disp# off  imm# off  byte? off ;

: .b  1 byte? !  imm# @ 1 min imm# ! ;

: .w   .onow off .64now off ;   : .wa  .anow off ;
: .d   .onow on .64now off ;    : .da  .anow on ;
: .q   .64now on ;              : .qa  .anow off ;
: -rex  .64now off ;            : -size  .osize @ .onow ! ;

\ Extra-Werte compilieren                              01may95py
: bytes,  ( nr x n -- )
    0 ?DO  over 0< IF  +rel  swap 1+ swap  THEN  dup ,  $8 rshift
    LOOP   2drop ;
: rbytes, ( nr x n -- )
    >r here r@ + - r> bytes, ;
: opcode, ( opcode -- )
    .asize @ .anow @  <> IF  $67 ,  THEN
    media  @ IF media @ ,
    ELSE .osize @ .onow @  <> IF  $66 ,  THEN  THEN
    seg    @ IF  seg @ ,  THEN
    .64now @ IF  .rex @ $08 or .rex !  THEN
    .rex   @ IF  .rex @ $F and $40 or ,  THEN
    ,  pre- ;
: finish ( opcode -- )  opcode,
    ModR/M# @ IF  ModR/M @ ,  THEN
    SIB#    @ IF  SIB    @ ,  THEN
    Adisp?  @ disp @ disp# @ .arel @ IF  rbytes,  ELSE  bytes,  THEN
    Aimm?   @ imm  @ imm#  @ bytes,    sclear  ;
: finishb  ( opcode -- )       byte? @ xor  finish ;
: 0F,  $0F opcode, ;

\ Possible bug: '.64now off' does not have any effect _after_ 0F,
: finish0F ( opcode -- )       0F, .64now off finish ;

\ Register                                             29mar94py

: Regs  ( mod n -- ) FOR  dup Constant 11 +  NEXT  drop ;
: breg  ( reg -- )  Create c,  DOES> c@  .b ;
: bregs ( mod n -- ) FOR  dup breg     11 +  NEXT  drop ;
: wadr: ( reg -- )  Create c,  DOES> c@  .wa ;
: wadr  ( mod n -- ) FOR  dup wadr:    11 +  NEXT  drop ;
      0 7 wadr [BX+SI] [BX+DI] [BP+SI] [BP+DI] [SI] [DI] [BP] [BX]
    300 7 regs  AX CX DX BX SP BP SI DI
0200300 7 regs  R8 R9 R10 R11 R12 R13 R14 R15
    300 7 bregs AL CL DL BL AH CH DH BH
2000300 3 bregs SPL BPL SIL DIL
0200300 7 bregs  R8L R9L R10L R11L R12L R13L R14L R15L
  2300 5 regs ES CS SS DS FS GS
' SI alias RP   ' BP alias UP   ' DI Alias OP
: .386  .64size off .64bit off .asize on   .osize on  sclear ;
: .86   .64size off .64bit off .asize off  .osize off sclear ;
: .amd64  .386  .64size on  .asize off  .64bit on  sclear ;  .amd64
: asize@  2 .anow @ IF  2*  THEN  .64bit @ IF  drop 4  THEN ;
: osize@  2 .onow @ IF  2*  THEN  ;

\ Address modes                                        01may95py
: index  ( breg1 ireg2 -- ireg )
    dup $10000 and >r 370 and swap dup $10000 and >r 7 and or
    SIB ! 1 SIB# ! r> r> 2* or 44 or ;
: #) ( disp -- reg )
    .64bit @ IF  disp !  44 55 index 4 disp# !  EXIT  THEN
    disp ! .anow @ IF  55 4  ELSE  66 2  THEN  disp# ! ;
: R#) ( disp -- reg )
    .64bit @ 0= abort" RIP address only in 64 bit mode"
    disp ! 4 disp# ! .arel on  55 ;
: *2   100 xor ;    : *4   200 xor ;    : *8   300 xor ;
: I) ( reg1 reg2 -- ireg )  .anow @ 0= abort" No Index!"
  *8  index ;
: I#) ( disp32 reg -- ireg ) BP swap I) swap #) drop ;
: seg)  ( seg disp -- -1 )
  disp !  asize@ disp# !  imm ! 2 imm# !  -1 ;
: )  ( reg -- reg )  dup SP = IF dup I) ELSE 200077 and THEN ;
: D) ( disp reg -- reg )  ) >r dup disp !  $80 -$80 within
  Adisp? @ or IF  200 asize@  ELSE  100 1  THEN disp# ! r> or ;
: DI) ( disp reg1 reg2 -- ireg )  I) D) ;
: A: ( -- )  Adisp? on ;        : A::  ( -- )  -2 Adisp? ! ;
: A#) ( imm -- )  A: #) ;       : Aseg) ( * -- ) A: seg) ;

\ # A# rel) CR DR TR ST <ST STP                        01jan98py
: # ( imm -- ) dup imm !  -$80 $80 within  byte? @ or
  IF  1  ELSE  osize@  THEN  imm# ! ;
: L#  ( imm -- )  imm !  osize@ imm# ! ;
: A#  ( imm -- )  Aimm? on  L# ;
: rel)  ( addr -- -2 )  disp ! asize@ disp# ! -2 ;
: L) ( disp reg -- reg ) ) >r disp ! 200 asize@ disp# ! r> or ;
: LI) ( disp reg1 reg2 -- reg ) I) L) ;
: >>mod ( reg1 reg2 -- mod )
   \ bug?  This can only add $10 to .rex.  However only bits 0..3 of .rex are
   \ used
    2dup or $80000 and $0F rshift .rex +!  
    dup $10000 and $0E rshift .rex +! 70 and swap
    dup $30000 and $10 rshift .rex +! 307 and or ;
: >reg ( reg -- reg' )  dup $10000 and $10 rshift .rex +! 7 and ;
: >mod ( reg1 reg2 -- )  >>mod modR/M !  1 modR/M# ! ;
: CR  ( n -- )  7 and 11 *  $1C0 or ;    0 CR constant CR0
: DR  ( n -- )  7 and 11 *  $2C0 or ;
: TR  ( n -- )  7 and 11 *  $3C0 or ;
: ST  ( n -- )  7 and       $5C0 or ;
: <ST ( n -- )  7 and       $7C0 or ;
: STP ( n -- )  7 and       $8C0 or ;

\ reg?                                                 10apr93py
: reg= ( reg flag mask -- flag ) 2 pick and = ;
: reg? ( reg -- reg flag )  $C0 $FFC0 reg= ;
: ?reg ( reg -- reg )  reg? 0= abort" reg expected!" ;
: ?mem ( mem -- mem )  dup $C0 < 0= abort" mem expected!" ;
: ?ax  ( reg -- reg )  dup AX <> abort" ax/al expected!" ;
: cr?  ( reg -- reg flag ) $100 $FF00 reg= ;
: dr?  ( reg -- reg flag ) $200 $FF00 reg= ;
: tr?  ( reg -- reg flag ) $300 $FF00 reg= ;
: sr?  ( reg -- reg flag ) $400 $FF00 reg= ;
: st?  ( reg -- reg flag ) dup $8 rshift 5 - ;
: ?st  ( reg -- reg ) st? 0< abort" st expected!" ;
: xr?  ( reg -- reg flag ) dup $FF > ;
: ?xr  ( reg -- reg )  xr? 0= abort" xr expected!" ;
: rel? ( reg -- reg flag ) dup -2 = ;
: seg? ( reg -- reg flag ) dup -1 = ;

\ Single Byte instruction                              27mar94py

: bc:   ( opcode -- )  Create c, DOES> c@ ,        ;
: bc.b: ( opcode -- )  Create c, DOES> c@ finishb  ;
: bc0F: ( opcode -- )  Create c, DOES> c@ finish0F ;

: seg:  ( opcode -- )  Create c, DOES> c@ seg ! ;

$26 seg: ES:    $2E seg: CS:    $36 seg: SS:    $3E seg: DS:
$64 seg: FS:    $65 seg: GS:

Forth

\ arithmetics                                          07nov92py

: reg>mod ( reg1 reg2 -- 1 / 3 )
    reg? IF  >mod 3  ELSE  swap ?reg >mod 1  THEN  ;
: ari: ( n -- ) Create c,
    DOES> ( reg1 reg2 / reg -- )  c@ >r imm# @
    IF    imm# @ byte? @ + 1 > over AX = and
          IF    drop $05 r> 70 and or
          ELSE  r> >mod $81 imm# @ 1 byte? @ + = IF 2 + THEN
          THEN
    ELSE  reg>mod  r> 70 and or
    THEN  finishb  ;

00 ari: add     11 ari: or      22 ari: adc     33 ari: sbb
44 ari: and     55 ari: sub     66 ari: xor     77 ari: cmp

\ bit shifts    strings                                07nov92py

: shift: ( n -- )  Create c,
    DOES> ( r/m -- )  c@ >mod  imm# @
    IF    imm @ 1 =
          IF  $D1 0  ELSE  $C1 1  THEN   imm# !
    ELSE  $D3
    THEN  finishb ;

00 shift: rol   11 shift: ror   22 shift: rcl   33 shift: rcr
44 shift: shl   55 shift: shr   66 shift: sal   77 shift: sar

$6D bc.b: ins   $6F bc.b: outs
$A5 bc.b: movs  $A7 bc.b: cmps
$AB bc.b: stos  $AD bc.b: lods  $AF bc.b: scas

\ movxr                                                07feb93py

: xr>mod  ( reg1 reg2 -- 0 / 2 )
    xr?  IF  >mod  2  ELSE  swap ?xr >mod  0  THEN  ;

: movxr  ( reg1 reg2 -- )
    -rex   2dup or sr? nip 
    IF    xr>mod  $8C or finish
    ELSE  2dup or $8 rshift 1+ -3 and >r  xr>mod  r> $20 or or finish0F
    THEN  ;

\ mov                                                  23jan93py

: assign#  byte? @ 0= IF  osize@ imm# !  ELSE 1 imm# ! THEN ;
: ?64off  .64bit @ .anow @ 0= and IF  10 disp# ! THEN  0 sib# ! ;
: ?ofax ( reg ax -- flag )
    .64bit @ IF  44  ELSE  .anow @ IF 55 ELSE 66 THEN  THEN AX d= ;
: mov ( r/m reg / reg r/m / reg -- )  2dup or 0> imm# @ and
  IF    assign#  reg?
        IF    >reg  $B8 or byte? @ 3 lshift xor  byte? off
	      .64now @ IF  10 imm# !  THEN
        ELSE  0 >mod  $C7  THEN
  ELSE  2dup or $FFFF and $FF > IF  movxr exit  THEN
        2dup ?ofax
        IF  2drop $A1  ?64off  ELSE  2dup swap  ?ofax
            IF  2drop $A3  ?64off  ELSE  reg>mod $88 or  THEN
        THEN
  THEN  finishb ;

\ not neg mul (imul div idiv                           29mar94py

: modf  ( r/m reg opcode -- )  -rot >mod finish   ;
: modfb ( r/m reg opcode -- )  -rot >mod finishb  ;
: mod0F ( r/m reg opcode -- )  -rot >mod finish0F ;
: modf:  Create  c,  DOES>  c@ modf ;
: not: ( mode -- )  Create c, DOES> ( r/m -- ) c@ $F7 modfb ;

00 not: test#                 22 not: NOT     33 not: NEG
44 not: MUL     55 not: (IMUL 66 not: DIV     77 not: IDIV

: inc: ( mode -- )  Create c,
  DOES>  ( r/m -- )
    c@ >r reg?  byte? @ 0=  and .64bit @ 0= and
    IF    107 and r> 70 and or finish
    ELSE  r> $FF modfb   THEN ;
00 inc: INC     11 inc: DEC

\ test shld shrd                                       07feb93py

: test  ( reg1 reg2 / reg -- )  imm# @
  IF    assign#  AX case?
        IF  $A9  ELSE  test#  exit  THEN
  ELSE  ?reg >mod  $85  THEN  finishb ;

: shd ( r/m reg opcode -- )
    imm# @ IF  1 imm# ! 1-  THEN  mod0F ;
: shld  swap 245 shd ;          : shrd  swap 255 shd ;

: btx: ( r/m reg/# code -- )  Create c,
    DOES> c@ >r imm# @
    IF    1 imm# !  r> $BA
    ELSE  swap 203 r> >>mod  THEN  mod0F ;
44 btx: bt      55 btx: bts     66 btx: btr     77 btx: btc

\ push pop                                             05jun92py

: pushs   swap  FS case?  IF  $A0 or finish0F exit  THEN
                  GS case?  IF  $A8 or finish0F exit  THEN
    30 and 6 or or finish ;

: push  ( reg -- )
  -rex
  imm# @ 1 = IF  $6A finish exit  THEN
  imm# @     IF  $68 finish exit  THEN
  reg?       IF  >reg $50 or finish exit  THEN
  sr?        IF  0 pushs  exit  THEN
  66 $FF modf ;
: pop   ( reg -- )
  -rex
  reg?       IF  >reg $58 or finish exit  THEN
  sr?        IF  1 pushs  exit  THEN
  06 $8F modf ;

\ Ascii Arithmetics                                    22may93py

$27 bc: DAA     $2F bc: DAS     $37 bc: AAA     $3F bc: AAS

: aa:  Create c,
    DOES> ( -- ) c@
    imm# @ 0= IF  &10 imm !  THEN  1 imm# ! finish ;
$D4 aa: AAM     $D5 aa: AAD     $D6 bc: SALC    $D7 bc: XLAT

$60 bc: PUSHA   $61 bc: POPA
$90 bc: NOP
$98 bc: CBW     $99 bc: CWD                     $9B bc: FWAIT
$9C bc: PUSHF   $9D bc: POPF    $9E bc: SAHF    $9F bc: LAHF
                $C9 bc: LEAVE
$CC bc: INT3                    $CE bc: INTO    $CF bc: IRET
' fwait Alias wait

\ one byte opcodes                                     25dec92py

$F0 bc: LOCK                    $F2 bc: REP     $F3 bc: REPE
$F4 bc: HLT     $F5 bc: CMC
$F8 bc: CLC     $F9 bc: STC     $FA bc: CLI     $FB bc: STI
$FC bc: CLD     $FD bc: STD

: ?brange ( offword --- offbyte )  dup $80 -$80 within
    IF ." branch offset out of 1-byte range" THEN ;
: sb: ( opcode -- )  Create c,
    DOES> ( addr -- ) >r  [A] here [F] 2 + - ?brange
    disp !  1 disp# !  r> c@ -rex finish ;
$E0 sb: LOOPNE  $E1 sb: LOOPE   $E2 sb: LOOP    $E3 sb: JCXZ
: (ret ( op -- )  imm# @  IF  2 imm# !  1-  THEN  -rex finish ;
: ret  ( -- )  $C3  (ret ;
: retf ( -- )  $CB  (ret ;

\ call jmp                                             22dec93py

: call  ( reg / disp -- ) rel?
  IF  drop $E8 disp @ [A] here [F] 1+ asize@ + - disp ! finish
      exit  THEN  22 $FF -rex modf ;
: callf ( reg / seg -- )
  seg? IF  drop $9A  finish exit  THEN  33 $FF -rex modf ;

: jmp   ( reg / disp -- )
  -rex
  rel? IF  drop disp @ [A] here [F] 2 + - dup -$80 $80 within
           IF    disp ! 1 disp# !  $EB
           ELSE  3 - disp ! $E9  THEN  finish exit  THEN
  44 $FF modf ;
: jmpf  ( reg / seg -- )
  seg? IF  drop $EA  finish exit  THEN  55 $FF -rex modf ;

: next .d ['] noop >code-address rel) jmp ;

\ jump if                                              22dec93py

: cond: 0 DO  i Constant  LOOP ;

$10 cond: vs vc   u< u>=  0= 0<>  u<= u>   0< 0>=  ps pc   <  >=   <=  >
$10 cond: o  no   b  nb   z  nz   be  nbe  s  ns   pe po   l  nl   le  nle
: jmpIF  ( addr cond -- )
  swap [A] here [F] 2 + - dup -$80 $80 within
  IF            disp ! $70 1
  ELSE  0F,  4 - disp ! $80 4  THEN  disp# ! or  -rex finish ;
: jmp:  Create c,  DOES> c@ jmpIF ;
: jmps  0 DO  i jmp:  LOOP ;
$10 jmps jo  jno   jb  jnb   jz  jnz   jbe  jnbe  js  jns   jpe jpo   jl  jnl   jle  jnle

\ xchg                                                 22dec93py

: setIF ( r/m cond -- ) 0 swap $90 or mod0F ;
: set: ( cond -- )  Create c,  DOES>  c@ setIF ;
: sets: ( n -- )  0 DO  I set:  LOOP ;
$10 sets: seto setno  setb  setnb  sete setne  setna seta  sets setns  setpe setpo  setl setge  setle setg
: xchg ( r/m reg / reg r/m -- )
  over AX = IF  swap  THEN  reg?  0= IF  swap  THEN  ?reg
  byte? @ 0=  IF AX case?
  IF reg? IF >reg $90 or finish exit THEN  AX  THEN THEN
  $87 modfb ;

: movx ( r/m reg opcode -- ) 0F, modfb ;
: movsx ( r/m reg -- )  $BF movx ;
: movzx ( r/m reg -- )  $B7 movx ;

\ misc                                                 16nov97py

: ENTER ( imm8 -- ) 2 imm# ! $C8 -rex finish [A] , [F] ;
: ARPL ( reg r/m -- )  swap $63 modf ;
$62 modf: BOUND ( mem reg -- )

: mod0F:  Create c,  DOES> c@ mod0F ;
$BC mod0F: BSF ( r/m reg -- )   $BD mod0F: BSR ( r/m reg -- )

$06 bc0F: CLTS
$08 bc0F: INVD  $09 bc0F: WBINVD

: CMPXCHG ( reg r/m -- ) swap $A7 movx ;
: CMPXCHG8B ( r/m -- )   $8 $C7 movx ;
: BSWAP ( reg -- )       >reg $C8 or finish0F ;
: XADD ( r/m reg -- )    $C1 movx ;

\ misc                                                 20may93py

: IMUL ( r/m reg -- )  imm# @ 0=
  IF  dup AX =  IF  drop (IMUL exit  THEN
      $AF mod0F exit  THEN
  >mod imm# @ 1 = IF  $6B  ELSE  $69  THEN  finish ;
: io ( oc -- )  imm# @ IF  1 imm# !  ELSE  $8 +  THEN finishb ;
: IN  ( -- ) $E5 io ;
: OUT ( -- ) $E7 io ;
: INT ( -- ) 1 imm# ! $CD finish ;
: 0F.0: ( r/m -- ) Create c, DOES> c@ $00 -rex mod0F ;
00 0F.0: SLDT   11 0F.0: STR    22 0F.0: LLDT   33 0F.0: LTR
44 0F.0: VERR   55 0F.0: VERW
: 0F.1: ( r/m -- ) Create c, DOES> c@ $01 -rex mod0F ;
00 0F.1: SGDT   11 0F.1: SIDT   22 0F.1: LGDT   33 0F.1: LIDT
44 0F.1: SMSW                   66 0F.1: LMSW   77 0F.1: INVLPG

\ misc                                                 29mar94py

$02 mod0F: LAR ( r/m reg -- )
$8D modf:  LEA ( m reg -- )
$C4 modf:  LES ( m reg -- )
$C5 modf:  LDS ( m reg -- )
$B2 mod0F: LSS ( m reg -- )
$B4 mod0F: LFS ( m reg -- )
$B5 mod0F: LGS ( m reg -- )
\ Pentium/AMD K5 codes
: cpuid ( -- )  0F, $A2 [A] , [F] ;
: cmpchx8b ( m -- ) 0 $C7 mod0F ;
: rdtsc ( -- )  0F, $31 [A] , [F] ;
: rdmsr ( -- )  0F, $32 [A] , [F] ;
: wrmsr ( -- )  0F, $30 [A] , [F] ;
: rsm ( -- )  0F, $AA [A] , [F] ;

\ Floating point instructions                          22dec93py

$D8 bc: D8,   $D9 bc: D9,   $DA bc: DA,   $DB bc: DB,
$DC bc: DC,   $DD bc: DD,   $DE bc: DE,   $DF bc: DF,

: D9: Create c, DOES> D9, c@ finish ;

Variable fsize
: .fs   0 fsize ! ;  : .fl   4 fsize ! ;  : .fx   3 fsize ! ;
: .fw   6 fsize ! ;  : .fd   2 fsize ! ;  : .fq   7 fsize ! ;
.fx
: fop:  Create c,  DOES>  ( fr/m -- ) c@ >r
    st? dup 0< 0= IF  swap r> >mod 2* $D8 + finish exit  THEN
    drop ?mem r> >mod $D8 fsize @ dup 1 and dup 2* + - +
    finish ;
: f@!: Create c,  DOES>  ( fm -- ) c@ $D9 modf ;

\ Floating point instructions                          08jun92py

$D0 D9: FNOP

$E0 D9: FCHS    $E1 D9: FABS
$E4 D9: FTST    $E5 D9: FXAM
$E8 D9: FLD1    $E9 D9: FLDL2T  $EA D9: FLDL2E  $EB D9: FLDPI
$EC D9: FLDLG2  $ED D9: FLDLN2  $EE D9: FLDZ
$F0 D9: F2XM1   $F1 D9: FYL2X   $F2 D9: FPTAN   $F3 D9: FPATAN
$F4 D9: FXTRACT $F5 D9: FPREM1  $F6 D9: FDECSTP $F7 D9: FINCSTP
$F8 D9: FPREM   $F9 D9: FYL2XP1 $FA D9: FSQRT   $FB D9: FSINCOS
$FC D9: FRNDINT $FD D9: FSCALE  $FE D9: FSIN    $FF D9: FCOS

\ Floating point instructions                          23jan94py

00 fop: FADD    11 fop: FMUL    22 fop: FCOM    33 fop: FCOMP
44 fop: FSUB    55 fop: FSUBR   66 fop: FDIV    77 fop: FDIVR

: FCOMPP ( -- )  [A] 1 stp fcomp [F] ;
: FBLD   ( fm -- ) 44 $D8 modf ;
: FBSTP  ( fm -- ) 66 $DF modf ;
: FFREE  ( st -- ) 00 $DD modf ;
: FSAVE  ( fm -- ) 66 $DD modf ;
: FRSTOR ( fm -- ) 44 $DD modf ;
: FINIT  ( -- )  [A] DB, $E3 , [F] ;
: FXCH   ( st -- ) 11 $D9 modf ;

44 f@!: FLDENV  55 f@!: FLDCW   66 f@!: FSTENV  77 f@!: FSTCW

\ fild fst fstsw fucom                                 22may93py
: FUCOM ( st -- )  ?st st? IF 77 ELSE 66 THEN $DD modf ;
: FUCOMPP ( -- )  [A] DA, $E9 , [F] ;
: FNCLEX  ( -- )  [A] DB, $E2 , [F] ;
: FCLEX   ( -- )  [A] fwait fnclex [F] ;
: FSTSW ( r/m -- )
  dup AX = IF  44  ELSE  ?mem 77  THEN  $DF modf ;
: f@!,  fsize @ 1 and IF  drop  ELSE  nip  THEN
    fsize @ $D9 or modf ;
: fx@!, ( mem/st l x -- )  rot  st? 0=
    IF  swap $DD modf drop exit  THEN  ?mem -rot
    fsize @ 3 = IF drop $DB modf exit THEN  f@!, ;
: FST  ( st/m -- ) st?  0=
  IF  22 $DD modf exit  THEN  ?mem 77 22 f@!, ;
: FLD  ( st/m -- )  st? 0= IF 0 $D9 modf exit THEN 55 0 fx@!, ;
: FSTP ( st/m -- )  77 33 fx@!, ;

\ PPro instructions                                    28feb97py


: cmovIF ( r/m r flag -- )  $40 or mod0F ;
: cmov:  Create c, DOES> c@ cmovIF ;
: cmovs:  0 DO  I cmov:  LOOP ;
$10 cmovs: cmovo  cmovno   cmovb   cmovnb   cmovz  cmovnz   cmovbe  cmovnbe   cmovs  cmovns   cmovpe  cmovpo   cmovl  cmovnl   cmovle  cmovnle

\ MMX opcodes                                          02mar97py

    300 7 regs MM0 MM1 MM2 MM3 MM4 MM5 MM6 MM7

: mmxs ?DO  I mod0F:  LOOP ;
$64 $60 mmxs PUNPCKLBW PUNPCKLWD PUNOCKLDQ PACKUSDW
$68 $64 mmxs PCMPGTB   PCMPGTW   PCMPGTD   PACKSSWB
$6C $68 mmxs PUNPCKHBW PUNPCKHWD PUNPCKHDQ PACKSSDW
$78 $74 mmxs PCMPEQB   PCMPEQW   PCMPEQD   EMMS
$DA $D8 mmxs PSUBUSB   PSUBUSW
$EA $E8 mmxs PSUBSB    PSUBSW
$FB $F8 mmxs PSUBB     PSUBW     PSUBD
$DE $DC mmxs PADDUSB   PADDUSW
$EE $EC mmxs PADDSB    PADDSW
$FF $FC mmxs PADDB     PADDW     PADDD

\ MMX opcodes                                          02mar97py

$D5 mod0F: pmullw               $E5 mod0F: pmulhw
$F5 mod0F: pmaddwd
$DB mod0F: pand                 $DF mod0F: pandn
$EB mod0F: por                  $EF mod0F: pxor
: pshift ( mmx imm/m mod op -- )
  imm# @ IF  1 imm# !  ELSE  + $50 +  THEN  mod0F ;
: PSRLW ( mmx imm/m -- )  020 $71 pshift ;
: PSRLD ( mmx imm/m -- )  020 $72 pshift ;
: PSRLQ ( mmx imm/m -- )  020 $73 pshift ;
: PSRAW ( mmx imm/m -- )  040 $71 pshift ;
: PSRAD ( mmx imm/m -- )  040 $72 pshift ;
: PSLLW ( mmx imm/m -- )  060 $71 pshift ;
: PSLLD ( mmx imm/m -- )  060 $72 pshift ;
: PSLLQ ( mmx imm/m -- )  060 $73 pshift ;

\ MMX opcodes                                         27jun99beu

\ mmxreg --> mmxreg move
\ (dk)this misses the reg->mem move, redefined in the SSE part below
\ $6F mod0F: MOVQ   

\ memory/reg32 --> mmxreg load
$6F mod0F: PLDQ  \ Intel: MOVQ mm,m64
$6E mod0F: PLDD  \ Intel: MOVD mm,m32/r

\ mmxreg --> memory/reg32
: PSTQ ( mm m64   -- ) SWAP  $7F mod0F ; \ Intel: MOVQ m64,mm
: PSTD ( mm m32/r -- ) SWAP  $7E mod0F ; \ Intel: MOVD m32/r,mm

\ 3Dnow! opcodes (K6)                                  21apr00py
: ib!  ( code -- )  [A] # [F]  1 imm# ! ;
: mod0F# ( code imm -- )  ib! mod0F ;
: 3Dnow: ( imm -- )  Create c,  DOES>  -rex $0f swap c@ mod0F# ;
$0D 3Dnow: PI2FD                $1D 3Dnow: PF2ID
$90 3Dnow: PFCMPGE              $A0 3Dnow: PFCMPGT
$94 3Dnow: PFMIN                $A4 3Dnow: PFMAX
$96 3Dnow: PFRCP                $A6 3Dnow: PFRCPIT1
$97 3Dnow: PFRSQRT              $A7 3Dnow: PFRSQIT1
$9A 3Dnow: PFSUB                $AA 3Dnow: PFSUBR
$9E 3Dnow: PFADD                $AE 3Dnow: PFACC
$B0 3Dnow: PFCMPEQ              $B4 3Dnow: PFMUL
$B6 3Dnow: PFRCPIT2             $B7 3Dnow: PMULHRW
$BF 3Dnow: PAVGUSB

: FEMMS  -rex $0E finish0F ;
: PREFETCH  -rex 000 $0D mod0F ;    : PREFETCHW  -rex 010 $0D mod0F ;

\ SSE opcodes (Athlon64)
    300 7 regs XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7
0200300 7 regs XMM8 XMM9 XMM10 XMM11 XMM12 XMM13 XMM14 XMM15

: finishxx0f  ( $xx0fyy -- )  \ xx 0f yy opcodes  (xx=66,f2,f3 or 00 if none)
   dup $10 rshift media !  -size  $ff and finish0F ;
: modxx0f  -rot >mod finishxx0f ;

: movxx:  ( opcode1-regdst opcode2-memdst -- )  create ,  ,
   does>  ( xmm1 xmm2/mem | xmm1/mem xmm2   a-addr -- ) 
   >r reg>mod r>  swap 3 = if cell+ then  @
   -rex finishxx0f ;

$0f10   $0f11   movxx: movups   $660f10 $660f11 movxx: movupd
$0f12   $0f13   movxx: movlps   $660f12 $660f13 movxx: movlpd
$0f16   $0f17   movxx: movhps   $660f16 $660f17 movxx: movhpd
$0f28   $0f29   movxx: movaps   $660f28 $660f29 movxx: movapd
$f30f10 $f30f11 movxx: movss    $f20f10 $f20f11 movxx: movsd   

\ todo: how do we switch all the other MMX/SSE bastards between the two modes?
\ obviously that requires only inserting a $66 prefix
\ ugly solution: set .osize flag when XMM is used!?
$0f6f   $0f7f   movxx: movq-mmx
$f30f7e $660fd6 movxx: movq

\ todo: can be used with .d/.q prefix (i.e. 64-bit mov with REX, else 32-bit
\ mov)  So now we have REX and $66 prefix!
$0f6e   $0f7e   movxx: movd-mmx
$660f6e $660f7e movxx: movd

\ SSE arithmetic
\ todo: REX prefixes seem to come in between Fx and 0F opcode bytes.
\ ouch.  need to touch opcode,?
: sse:  ( opc1 rex? "name" -- ) create swap , c,
   does> ( xmm1 xmm2/mem a-addr -- )
   dup @  swap cell+ c@ 0= if ( suppress rex) .d then
   modxx0f ;
: sses  ( prefixN..prefix1 opc n "name1" ... "nameN"  -- )
   0 do   swap $10 lshift over or  0 sse:  loop  drop ;
: sse2xa  ( opc "name1" "name2-66"  -- )   $66 $00 rot 2 sses ;
: sse2xb  ( opc "name1-f3" "name2-f2"  -- )   $f2 $f3 rot 2 sses ;
: sse2xc  ( opc "name1-f3" "name2-f2"  -- )  $66 $f2 rot 2 sses ;
: sse4x  ( opc "name1" "name4"  -- )   dup sse2xa sse2xb ;

$0f58 sse4x addps addpd addss addsd
$0f5c sse4x subps subpd subss subsd  
$0f5f sse4x maxps maxpd maxss maxsd
$0f5d sse4x minps minpd minss minsd
$0f59 sse4x mulps mulpd mulss mulsd
$0f5e sse4x divps divpd divss divsd
$0f54 sse2xa andps andpd
$0f55 sse2xa andnps andnpd
$0f56 sse2xa orps orpd
$0f57 sse2xa xorps xorpd
$0f2e sse2xa ucomiss ucomisd
$0f2f sse2xa comiss comisd
$0f5b sse2xa cvtdq2ps cvtps2dq   $f30f5b 0 sse: cvttps2dq 
$0fe6 sse2xb cvtdq2pd cvtpd2dq   $660f5b 0 sse: cvttpd2dq 
$0f2d sse2xa cvtps2pi cvtpd2pi   
$0f2a sse2xa cvtpi2ps cvtpi2pd   
( these take .d/.q size prefix:)
$f30f2d 1 sse: cvtss2si
$f20f2d 1 sse: cvtsd2si
$f20f2a 1 sse: cvtsi2sd
$f30f2a 1 sse: cvtsi2ss

$0f5e sse2xa divps divpd
$0f7c sse2xc haddps haddpd
$0f7d sse2xc hsubps hsubpd
$0fd0 sse2xc addsubps addsubpd

\ todo: cvtss2si : rex prefix!
: cmp: ( opc #cmp "name" -- )
   create swap , c,
   does>  ( xmm1/mem xmm2 a-addr -- )
   dup @ swap cell+ c@ ib! -rex modxx0f ;
: cmps:  ( opc "name1" ... "name8" -- )  $8 0 do  dup i cmp: loop  drop ;
$0fc2 cmps: cmpeqps cmpltps cmpleps cmpunordps cmpneqps cmpnltps cmpnleps cmpordps
$660fc2 cmps: cmpeqpd cmpltpd cmplepd cmpunordpd cmpneqpd cmpnltpd cmpnlepd cmpordpd
$f30fc2 cmps: cmpeqss cmpltss cmpless cmpunordss cmpneqss cmpnltss cmpnless cmpordss
$f20fc2 cmps: cmpeqsd cmpltsd cmplesd cmpunordsd cmpneqsd cmpnltsd cmpnlesd cmpordsd 

\ 3Dnow!+MMX opcodes (Athlon)                          21apr00py

$F7 mod0F: MASKMOVQ             $E7 mod0F: MOVNTQ
$E0 mod0F: PAVGB                $E3 mod0F: PAVGW
$C5 mod0F: PEXTRW               $C4 mod0F: PINSRW
$EE mod0F: PMAXSW               $DE mod0F: PMAXUB
$EA mod0F: PMINSW               $DA mod0F: PMINUB
$D7 mod0F: PMOVMSKB             $E4 mod0F: PMULHUW
$F6 mod0F: PSADBW               $70 mod0F: PSHUFW

$0C 3Dnow: PI2FW                $1C 3Dnow: PF2IW
$8A 3Dnow: PFNACC               $8E 3Dnow: PFPNACC
$BB 3Dnow: PSWABD                  : SFENCE  .d $ae $f8 ib! finish0f ;     
: PREFETCHNTA  .d 000 $18 mod0F ;  : PREFETCHT0 .d 010 $18 mod0F ;
: PREFETCHT1   .d 020 $18 mod0F ;  : PREFETCHT2 .d  030 $18 mod0F ;

\ Assembler Conditionals                               22dec93py
: ~cond ( cond -- ~cond )  1 xor ;
: >offset ( start dest --- offbyte )  swap  2 + -  ?brange ;
: IF ( cond -- here )  [A] here [F] dup 2 + rot  ~cond  jmpIF ;
: THEN       dup [A] here >offset swap 1+ c! [F] ;
: AHEAD      [A] here [F] dup 2 + rel) jmp ;
: ELSE       [A] AHEAD swap THEN [F] ;
: BEGIN      [A] here ;         ' BEGIN Alias DO  [F]
: WHILE      [A] IF [F] swap ;
: UNTIL      ~cond  jmpIF ;
: AGAIN      rel) jmp ;
: REPEAT     [A] AGAIN  THEN [F] ;
: ?DO        [A] here [F] dup 2 + dup jcxz ;
: BUT        swap ;
: YET        dup ;
: makeflag   [A] ~cond AL swap setIF  1 # AX and  AX dec [F] ;


previous previous set-current decimal base !


CVS Admin

Powered by ViewCVS 1.0-dev
(Powered by ViewCVS)

ViewCVS and CVS Help