--- gforth/utf-8.fs 2008/11/23 21:09:55 1.37 +++ gforth/utf-8.fs 2011/12/31 15:29:25 1.44 @@ -1,6 +1,6 @@ \ UTF-8 handling 12dec04py -\ Copyright (C) 2004,2005,2006,2007,2008 Free Software Foundation, Inc. +\ Copyright (C) 2004,2005,2006,2007,2008,2009,2010,2011 Free Software Foundation, Inc. \ This file is part of Gforth. @@ -119,7 +119,8 @@ Defer check-xy ' noop IS check-xy dup $f8 u< if drop 4 exit endif dup $fc u< if drop 5 exit endif dup $fe u< if drop 6 exit endif - UTF-8-err throw ; + dup $ff u< if drop 7 exit endif + drop 8 ; : -u8trailing-garbage ( addr u1 -- addr u2 ) 2dup + dup u8<< ( addr u1 end1 end2 ) @@ -272,15 +273,17 @@ here wc-table - Constant #wc-table \ inefficient table walk: -: wcwidth ( xc -- n ) +: xc-width ( xc -- n ) wc-table #wc-table over + swap ?DO dup I 2@ within IF I 2 cells + @ UNLOOP EXIT THEN 3 cells +LOOP 1 ; +[ELSE] + ' wcwidth Alias xc-width [THEN] : u8width ( xcaddr u -- n ) 0 rot rot over + swap ?DO - I xc@+ swap >r wcwidth + + I xc@+ swap >r xc-width + r> I - +LOOP ; : set-encoding-utf-8 ( -- ) @@ -297,6 +300,9 @@ here wc-table - Constant #wc-table ['] +u8/string is +x/string [ [THEN] ] ['] u8@ is xc@ +[ [IFDEF] xc!+ ] + ['] u8!+ is xc!+ +[ [THEN] ] ['] u8!+? is xc!+? ['] u8@+ is xc@+ ['] u8len is xc-size @@ -325,6 +331,9 @@ environment-wordlist set-current \G ``ISO-LATIN-1'' or ``UTF-8'', with the exception of ``ASCII'', where \G we prefer the alias ``ASCII''. max-single-byte $80 = IF s" UTF-8" ELSE s" ISO-LATIN-1" THEN ; +: max-xchar ( -- xchar ) + max-single-byte $80 = IF $7FFFFFFF ELSE $FF THEN ; +' noop Alias X:xchar forth definitions :noname ( -- )