| \ UTF-8 handling 12dec04py |
\ UTF-8 handling 12dec04py |
| |
|
| \ Copyright (C) 2004 Free Software Foundation, Inc. |
\ Copyright (C) 2004,2005 Free Software Foundation, Inc. |
| |
|
| \ This file is part of Gforth. |
\ This file is part of Gforth. |
| |
|
| : u8len ( u8 -- n ) |
: u8len ( u8 -- n ) |
| dup max-single-byte u< IF drop 1 EXIT THEN \ special case ASCII |
dup max-single-byte u< IF drop 1 EXIT THEN \ special case ASCII |
| $800 2 >r |
$800 2 >r |
| BEGIN 2dup u>= WHILE 5 lshift r> 1+ >r REPEAT |
BEGIN 2dup u>= WHILE 5 lshift r> 1+ >r dup 0= UNTIL THEN |
| 2drop r> ; |
2drop r> ; |
| |
|
| : u8@+ ( u8addr -- u8addr' u ) |
: u8@+ ( u8addr -- u8addr' u ) |
| count dup max-single-byte u< ?EXIT \ special case ASCII |
count dup max-single-byte u< ?EXIT \ special case ASCII |
| |
dup $C2 u< IF UTF-8-err throw THEN \ malformed character |
| $7F and $40 >r |
$7F and $40 >r |
| BEGIN dup r@ and WHILE r@ xor |
BEGIN dup r@ and WHILE r@ xor |
| 6 lshift r> 5 lshift >r >r count |
6 lshift r> 5 lshift >r >r count |
| |
|
| : u8key ( -- u ) |
: u8key ( -- u ) |
| defers key dup max-single-byte u< ?EXIT \ special case ASCII |
defers key dup max-single-byte u< ?EXIT \ special case ASCII |
| |
dup $C2 u< IF UTF-8-err throw THEN \ malformed character |
| $7F and $40 >r |
$7F and $40 >r |
| BEGIN dup r@ and WHILE r@ xor |
BEGIN dup r@ and WHILE r@ xor |
| 6 lshift r> 5 lshift >r >r defers key |
6 lshift r> 5 lshift >r >r defers key |
| nip nip over - |
nip nip over - |
| then ; |
then ; |
| |
|
| |
[IFUNDEF] wcwidth |
| |
: wcwidth abort ; |
| |
[THEN] |
| |
|
| |
: u8width ( xcaddr u -- n ) |
| |
0 rot rot over + swap ?DO |
| |
I xc@+ swap >r wcwidth + |
| |
r> I - +LOOP ; |
| |
|
| : set-encoding-utf-8 ( -- ) |
: set-encoding-utf-8 ( -- ) |
| ['] u8emit is xemit |
['] u8emit is xemit |
| ['] u8key is xkey |
['] u8key is xkey |
| ['] u8!+? is xc!+? |
['] u8!+? is xc!+? |
| ['] u8@+ is xc@+ |
['] u8@+ is xc@+ |
| ['] u8len is xc-size |
['] u8len is xc-size |
| |
[ [IFDEF] x-width ] |
| |
['] u8width is x-width |
| |
[ [THEN] ] |
| ['] -u8trailing-garbage is -trailing-garbage |
['] -u8trailing-garbage is -trailing-garbage |
| ; |
; |
| |
|
| |
: utf-8-cold ( -- ) |
| |
s" LC_ALL" getenv 2dup d0= IF 2drop |
| |
s" LC_CTYPE" getenv 2dup d0= IF 2drop |
| |
s" LANG" getenv 2dup d0= IF 2drop |
| |
s" C" THEN THEN THEN |
| |
s" UTF-8" search nip nip |
| |
IF set-encoding-utf-8 ELSE set-encoding-fixed-width THEN ; |
| |
|
| |
' utf-8-cold INIT8 chained |
| |
|
| |
utf-8-cold |