Diff for /gforth/utf-8.fs between versions 1.27 and 1.32

version 1.27, 2007/06/30 23:00:14 version 1.32, 2007/10/17 16:05:22
Line 94  Defer check-xy  ' noop IS check-xy Line 94  Defer check-xy  ' noop IS check-xy
   
 \ utf-8 stuff for xchars  \ utf-8 stuff for xchars
   
 : u8string+ ( xcaddr u -- xcaddr u' )  : +u8/string ( xc-addr1 u1 -- xc-addr2 u2 )
     over + u8>> over - ;  
 : u8string- ( xcaddr u -- xcaddr u' )  
     over + u8<< over - ;  
   
 : +u8string ( xc-addr1 u1 -- xc-addr2 u2 )  
     over dup u8>> swap - /string ;      over dup u8>> swap - /string ;
 : -u8string ( xc-addr1 u1 -- xc-addr2 u2 )  : u8\string- ( xcaddr u -- xcaddr u' )
     over dup u8<< swap - /string ;      over + u8<< over - ;
   
 : u8@ ( c-addr -- u )  : u8@ ( c-addr -- u )
     u8@+ nip ;      u8@+ nip ;
Line 115  Defer check-xy  ' noop IS check-xy Line 110  Defer check-xy  ' noop IS check-xy
         >r u8!+ r> r> swap - true          >r u8!+ r> r> swap - true
     then ;      then ;
   
 : u8addrlen ( u8-addr -- u )  : u8addrlen ( u8-addr u -- u )  drop
     \ length of UTF-8 char starting at u8-addr (accesses only u8-addr)      \ length of UTF-8 char starting at u8-addr (accesses only u8-addr)
     c@      c@
     dup $80 u< if drop 1 exit endif      dup $80 u< if drop 1 exit endif
Line 129  Defer check-xy  ' noop IS check-xy Line 124  Defer check-xy  ' noop IS check-xy
   
 : -u8trailing-garbage ( addr u1 -- addr u2 )  : -u8trailing-garbage ( addr u1 -- addr u2 )
     2dup + dup u8<< ( addr u1 end1 end2 )      2dup + dup u8<< ( addr u1 end1 end2 )
     2dup dup u8addrlen + = if \ last character ok      2dup dup over over - u8addrlen + = if \ last character ok
         2drop          2drop
     else      else
         nip nip over -          nip nip over -
Line 295  here wc-table - Constant #wc-table Line 290  here wc-table - Constant #wc-table
     ['] u8>> is xchar+      ['] u8>> is xchar+
     ['] u8<< is xchar-      ['] u8<< is xchar-
 [ [IFDEF] xstring+ ]  [ [IFDEF] xstring+ ]
     ['] u8string+ is xstring+      ['] u8\string- is xstring-
     ['] u8string- is xstring-      ['] +u8/string is +xstring
     ['] +u8string is +xstring  [ [THEN] ]
     ['] -u8string is -xstring  [ [IFDEF] +x/string ]
       ['] u8\string- is x\string-
       ['] +u8/string is +x/string
 [ [THEN] ]  [ [THEN] ]
     ['] u8@ is xc@      ['] u8@ is xc@
     ['] u8!+? is xc!+?      ['] u8!+? is xc!+?
Line 307  here wc-table - Constant #wc-table Line 304  here wc-table - Constant #wc-table
 [ [IFDEF] x-width ]  [ [IFDEF] x-width ]
     ['] u8width is x-width      ['] u8width is x-width
 [ [THEN] ]  [ [THEN] ]
   [ [IFDEF] x-size ]
       ['] u8addrlen is x-size
   [ [THEN] ]
     ['] -u8trailing-garbage is -trailing-garbage      ['] -u8trailing-garbage is -trailing-garbage
 ;  ;
   
Line 318  here wc-table - Constant #wc-table Line 318  here wc-table - Constant #wc-table
     s" UTF-8" search nip nip      s" UTF-8" search nip nip
     IF  set-encoding-utf-8  ELSE  set-encoding-fixed-width  THEN ;      IF  set-encoding-utf-8  ELSE  set-encoding-fixed-width  THEN ;
   
   environment-wordlist set-current
   : xchar-encoding ( -- addr u ) \ xchar-ext
       \G Returns a printable ASCII string that reperesents the encoding,
       \G and use the preferred MIME name (if any) or the name in
       \G @url{http://www.iana.org/assignments/character-sets} like
       \G ``ISO-LATIN-1'' or ``UTF-8'', with the exception of ``ASCII'', where
       \G we prefer the alias ``ASCII''.
       max-single-byte $80 = IF s" UTF-8" ELSE s" ISO-LATIN-1" THEN ;
   forth definitions
   
 :noname ( -- )  :noname ( -- )
     defers 'cold      defers 'cold
     utf-8-cold      utf-8-cold

Removed from v.1.27  
changed lines
  Added in v.1.32


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>