--- gforth/utf-8.fs 2007/07/14 19:57:16 1.28 +++ gforth/utf-8.fs 2007/12/31 18:40:24 1.34 @@ -1,12 +1,12 @@ \ UTF-8 handling 12dec04py -\ Copyright (C) 2004,2005,2006 Free Software Foundation, Inc. +\ Copyright (C) 2004,2005,2006,2007 Free Software Foundation, Inc. \ This file is part of Gforth. \ Gforth is free software; you can redistribute it and/or \ modify it under the terms of the GNU General Public License -\ as published by the Free Software Foundation; either version 2 +\ as published by the Free Software Foundation, either version 3 \ of the License, or (at your option) any later version. \ This program is distributed in the hope that it will be useful, @@ -15,8 +15,7 @@ \ GNU General Public License for more details. \ You should have received a copy of the GNU General Public License -\ along with this program; if not, write to the Free Software -\ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. +\ along with this program. If not, see http://www.gnu.org/licenses/. \ short: u8 means utf-8 encoded address @@ -94,15 +93,10 @@ Defer check-xy ' noop IS check-xy \ utf-8 stuff for xchars -: u8string+ ( xcaddr u -- xcaddr u' ) - over + u8>> over - ; -: u8string- ( xcaddr u -- xcaddr u' ) - over + u8<< over - ; - -: +u8string ( xc-addr1 u1 -- xc-addr2 u2 ) +: +u8/string ( xc-addr1 u1 -- xc-addr2 u2 ) over dup u8>> swap - /string ; -: -u8string ( xc-addr1 u1 -- xc-addr2 u2 ) - over dup u8<< swap - /string ; +: u8\string- ( xcaddr u -- xcaddr u' ) + over + u8<< over - ; : u8@ ( c-addr -- u ) u8@+ nip ; @@ -295,10 +289,12 @@ here wc-table - Constant #wc-table ['] u8>> is xchar+ ['] u8<< is xchar- [ [IFDEF] xstring+ ] - ['] u8string+ is xstring+ - ['] u8string- is xstring- - ['] +u8string is +xstring - ['] -u8string is -xstring + ['] u8\string- is xstring- + ['] +u8/string is +xstring +[ [THEN] ] +[ [IFDEF] +x/string ] + ['] u8\string- is x\string- + ['] +u8/string is +x/string [ [THEN] ] ['] u8@ is xc@ ['] u8!+? is xc!+? @@ -321,6 +317,16 @@ here wc-table - Constant #wc-table s" UTF-8" search nip nip IF set-encoding-utf-8 ELSE set-encoding-fixed-width THEN ; +environment-wordlist set-current +: xchar-encoding ( -- addr u ) \ xchar-ext + \G Returns a printable ASCII string that reperesents the encoding, + \G and use the preferred MIME name (if any) or the name in + \G @url{http://www.iana.org/assignments/character-sets} like + \G ``ISO-LATIN-1'' or ``UTF-8'', with the exception of ``ASCII'', where + \G we prefer the alias ``ASCII''. + max-single-byte $80 = IF s" UTF-8" ELSE s" ISO-LATIN-1" THEN ; +forth definitions + :noname ( -- ) defers 'cold utf-8-cold