![]() ![]() | ![]() |
Added start-mark/end-mark for error display
1: \ UTF-8 handling 12dec04py 2: 3: \ Copyright (C) 2004 Free Software Foundation, Inc. 4: 5: \ This file is part of Gforth. 6: 7: \ Gforth is free software; you can redistribute it and/or 8: \ modify it under the terms of the GNU General Public License 9: \ as published by the Free Software Foundation; either version 2 10: \ of the License, or (at your option) any later version. 11: 12: \ This program is distributed in the hope that it will be useful, 13: \ but WITHOUT ANY WARRANTY; without even the implied warranty of 14: \ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15: \ GNU General Public License for more details. 16: 17: \ You should have received a copy of the GNU General Public License 18: \ along with this program; if not, write to the Free Software 19: \ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. 20: 21: \ short: u8 means utf-8 encoded address 22: 23: s" malformed UTF-8 character" exception Constant UTF-8-err 24: 25: : u8len ( u8 -- n ) 26: dup $80 u< IF drop 1 EXIT THEN \ special case ASCII 27: $800 2 >r 28: BEGIN 2dup u>= WHILE 5 lshift r> 1+ >r REPEAT 29: 2drop r> ; 30: 31: : u8@+ ( u8addr -- u8addr' u ) 32: count dup $80 and 0= ?EXIT \ special case ASCII 33: $7F and $40 >r 34: BEGIN dup r@ and WHILE r@ xor 35: 6 lshift r> 5 lshift >r >r count 36: dup $C0 and $80 <> IF UTF-8-err throw THEN 37: $3F and r> or 38: REPEAT rdrop ; 39: 40: : u8!+ ( u u8addr -- u8addr' ) 41: over $80 < IF tuck c! 1+ EXIT THEN \ special case ASCII 42: >r 0 swap $3F 43: BEGIN 2dup u> WHILE 44: 2/ >r dup $3F and $80 or swap 6 rshift r> 45: REPEAT $7F xor 2* or r> 46: BEGIN over $80 u>= WHILE tuck c! 1+ REPEAT nip ; 47: 48: \ scan to next/previous character 49: 50: : u8>> ( u8addr -- u8addr' ) 51: BEGIN count $C0 and $80 <> UNTIL ; 52: : u8<< ( u8addr -- u8addr' ) 53: BEGIN 1- dup c@ $C0 and $80 <> UNTIL ; 54: 55: \ utf key and emit 56: 57: : u8key ( -- u ) 58: defers key dup $80 and 0= ?EXIT \ special case ASCII 59: $7F and $40 >r 60: BEGIN dup r@ and WHILE r@ xor 61: 6 lshift r> 5 lshift >r >r defers key 62: dup $C0 and $80 <> IF UTF-8-err throw THEN 63: $3F and r> or 64: REPEAT rdrop ; 65: 66: : u8emit ( u -- ) 67: dup $80 < IF defers emit EXIT THEN \ special case ASCII 68: 0 swap $3F 69: BEGIN 2dup u> WHILE 70: 2/ >r dup $3F and $80 or swap 6 rshift r> 71: REPEAT $7F xor 2* or 72: BEGIN dup $80 u>= WHILE defers emit REPEAT drop ; 73: 74: \ input editor 75: 76: [IFUNDEF] #esc 27 Constant #esc [THEN] 77: 78: : save-cursor ( -- ) #esc emit '7 emit ; 79: : restore-cursor ( -- ) #esc emit '8 emit ; 80: : .rest ( addr pos1 -- addr pos1 ) 81: restore-cursor 2dup type ; 82: : .all ( span addr pos1 -- span addr pos1 ) 83: restore-cursor >r 2dup swap type r> ; 84: 85: : <u8ins> ( max span addr pos1 u8char -- max span addr pos2 ) 86: >r 2over r@ u8len + u< IF rdrop bell EXIT THEN 87: >string over r@ u8len + swap move 2dup chars + r@ swap u8!+ drop 88: r> u8len >r rot r@ chars + -rot r> chars + ; 89: : (u8ins) ( max span addr pos1 u8char -- max span addr pos2 ) 90: <u8ins> .all .rest ; 91: : u8back ( max span addr pos1 -- max span addr pos2 f ) 92: dup IF over + u8<< over - 0 max .all .rest 93: ELSE bell THEN 0 ; 94: : u8forw ( max span addr pos1 -- max span addr pos2 f ) 95: 2 pick over <> IF over + u8@+ u8emit over - ELSE bell THEN 0 ; 96: : (u8del) ( max span addr pos1 -- max span addr pos2 ) 97: over + dup u8<< tuck - >r over - 98: >string over r@ + -rot move 99: rot r> - -rot ; 100: : ?u8del ( max span addr pos1 -- max span addr pos2 0 ) 101: dup IF (u8del) .all 2 spaces .rest THEN 0 ; 102: : <u8del> ( max span addr pos1 -- max span addr pos2 0 ) 103: 2 pick over <> 104: IF u8forw drop (u8del) .all 2 spaces .rest 105: ELSE bell THEN 0 ; 106: : u8eof 2 pick over or 0= IF bye ELSE <u8del> THEN ; 107: 108: : u8first-pos ( max span addr pos1 -- max span addr 0 0 ) 109: drop 0 .all .rest 0 ; 110: : u8end-pos ( max span addr pos1 -- max span addr span 0 ) 111: drop over .all 0 ; 112: 113: 114: : u8clear-line ( max span addr pos1 -- max addr ) 115: drop restore-cursor swap spaces restore-cursor ; 116: : u8clear-tib ( max span addr pos -- max 0 addr 0 false ) 117: u8clear-line 0 tuck dup ; 118: 119: : (u8enter) ( max span addr pos1 -- max span addr pos2 true ) 120: >r end^ 2@ hist-setpos 121: 2dup swap history write-line drop ( throw ) \ don't worry about errors 122: hist-pos 2dup backward^ 2! end^ 2! 123: r> .all space true ; 124: 125: : u8kill-expand ( max span addr pos1 -- max span addr pos2 ) 126: prefix-found cell+ @ ?dup IF >r 127: r@ - >string over r@ + -rot move 128: rot r@ - -rot .all r> spaces .rest THEN ; 129: 130: : insert ( string length buffer size -- ) 131: rot over min >r r@ - ( left over ) 132: over dup r@ + rot move r> move ; 133: 134: : u8tab-expand ( max span addr pos1 -- max span addr pos2 0 ) 135: key? IF #tab (u8ins) 0 EXIT THEN 136: u8kill-expand 2dup extract-word dup 0= IF nip EXIT THEN 137: search-prefix tib-full? 138: IF 7 emit 2drop prefix-off 139: ELSE dup >r 140: 2>r >string r@ + 2r> 2swap insert 141: r@ + rot r> + -rot 142: THEN 143: prefix-found @ IF bl (u8ins) ELSE .all .rest THEN 0 ; 144: 145: : utf-8-io ( -- ) 146: ['] u8forw ctrl F bindkey 147: ['] u8back ctrl B bindkey 148: ['] ?u8del ctrl H bindkey 149: ['] u8eof ctrl D bindkey 150: ['] <u8del> ctrl X bindkey 151: ['] u8clear-tib ctrl K bindkey 152: ['] u8first-pos ctrl A bindkey 153: ['] u8end-pos ctrl E bindkey 154: ['] (u8enter) #lf bindkey 155: ['] (u8enter) #cr bindkey 156: ['] u8tab-expand #tab bindkey 157: ['] (u8ins) IS insert-char 158: ['] kill-prefix IS everychar 159: ['] save-cursor IS everyline 160: ['] u8key IS key 161: ['] u8emit IS emit ; 162: