version 1.15, 2005/01/12 21:21:53
|
version 1.22, 2006/02/20 08:22:28
|
Line 1
|
Line 1
|
\ UTF-8 handling 12dec04py |
\ UTF-8 handling 12dec04py |
|
|
\ Copyright (C) 2004 Free Software Foundation, Inc. |
\ Copyright (C) 2004,2005 Free Software Foundation, Inc. |
|
|
\ This file is part of Gforth. |
\ This file is part of Gforth. |
|
|
Line 27 $80 Value max-single-byte
|
Line 27 $80 Value max-single-byte
|
: u8len ( u8 -- n ) |
: u8len ( u8 -- n ) |
dup max-single-byte u< IF drop 1 EXIT THEN \ special case ASCII |
dup max-single-byte u< IF drop 1 EXIT THEN \ special case ASCII |
$800 2 >r |
$800 2 >r |
BEGIN 2dup u>= WHILE 5 lshift r> 1+ >r REPEAT |
BEGIN 2dup u>= WHILE 5 lshift r> 1+ >r dup 0= UNTIL THEN |
2drop r> ; |
2drop r> ; |
|
|
: u8@+ ( u8addr -- u8addr' u ) |
: u8@+ ( u8addr -- u8addr' u ) |
count dup max-single-byte u< ?EXIT \ special case ASCII |
count dup max-single-byte u< ?EXIT \ special case ASCII |
|
dup $C2 u< IF UTF-8-err throw THEN \ malformed character |
$7F and $40 >r |
$7F and $40 >r |
BEGIN dup r@ and WHILE r@ xor |
BEGIN dup r@ and WHILE r@ xor |
6 lshift r> 5 lshift >r >r count |
6 lshift r> 5 lshift >r >r count |
Line 72 $80 Value max-single-byte
|
Line 73 $80 Value max-single-byte
|
|
|
: u8key ( -- u ) |
: u8key ( -- u ) |
defers key dup max-single-byte u< ?EXIT \ special case ASCII |
defers key dup max-single-byte u< ?EXIT \ special case ASCII |
|
dup $C2 u< IF UTF-8-err throw THEN \ malformed character |
$7F and $40 >r |
$7F and $40 >r |
BEGIN dup r@ and WHILE r@ xor |
BEGIN dup r@ and WHILE r@ xor |
6 lshift r> 5 lshift >r >r defers key |
6 lshift r> 5 lshift >r >r defers key |
Line 126 $80 Value max-single-byte
|
Line 128 $80 Value max-single-byte
|
nip nip over - |
nip nip over - |
then ; |
then ; |
|
|
|
[IFUNDEF] wcwidth |
|
: wcwidth abort ; |
|
[THEN] |
|
|
|
: u8width ( xcaddr u -- n ) |
|
0 rot rot over + swap ?DO |
|
I xc@+ swap >r wcwidth + |
|
r> I - +LOOP ; |
|
|
: set-encoding-utf-8 ( -- ) |
: set-encoding-utf-8 ( -- ) |
['] u8emit is xemit |
['] u8emit is xemit |
['] u8key is xkey |
['] u8key is xkey |
Line 137 $80 Value max-single-byte
|
Line 148 $80 Value max-single-byte
|
['] u8!+? is xc!+? |
['] u8!+? is xc!+? |
['] u8@+ is xc@+ |
['] u8@+ is xc@+ |
['] u8len is xc-size |
['] u8len is xc-size |
|
[ [IFDEF] x-width ] |
|
['] u8width is x-width |
|
[ [THEN] ] |
['] -u8trailing-garbage is -trailing-garbage |
['] -u8trailing-garbage is -trailing-garbage |
; |
; |
|
|
: utf-8-cold ( -- ) |
: utf-8-cold ( -- ) |
s" LANG" getenv s" .UTF-8" search nip nip |
s" LC_ALL" getenv 2dup d0= IF 2drop |
|
s" LC_CTYPE" getenv 2dup d0= IF 2drop |
|
s" LANG" getenv 2dup d0= IF 2drop |
|
s" C" THEN THEN THEN |
|
s" UTF-8" search nip nip |
IF set-encoding-utf-8 ELSE set-encoding-fixed-width THEN ; |
IF set-encoding-utf-8 ELSE set-encoding-fixed-width THEN ; |
|
|
' utf-8-cold INIT8 chained |
' utf-8-cold INIT8 chained |