1
// Token class and storage definition
2
function token (kind,val,fromchar,tochar,row) {
10
var tokens = []; // Array to hold the tokens.
12
//----------------------------------------------------------
13
// Store token in tokens array
14
// Creates a new token object using the constructor
15
//----------------------------------------------------------
17
function maketoken(kind,val,from,to,rowno)
19
newtoken=new token(kind,val,from,to,rowno);
20
tokens.push(newtoken);
23
//----------------------------------------------------------
24
// Writes error from tokenizer
25
//----------------------------------------------------------
27
function error(str,val,row)
29
alert("Tokenizer Error: "+str+val+" at row "+row);
32
//----------------------------------------------------------
34
// Tokenizer partly based on ideas from the very clever tokenizer written by Douglas Cockford
35
// The tokenizer is passed a string, and a string of prefix and suffix terminators
36
//----------------------------------------------------------
38
function tokenize(instring,inprefix,insuffix){
40
var from; // index of the start of the token.
41
var i = 0; // index of the current character.
42
var length=instring.length; // length of the string
44
var c; // current character.
45
var n; // current numerical value
46
var q; // current quote character
47
var str; // current string value.
48
var row=1; // current row value
51
c = instring.charAt(i);
56
if (c <= ' '){ // White space
57
if((c=='\n')||(c=='\r')||(c == '')) row++; // Add row if this white space is a row terminator
59
c = instring.charAt(i);
60
}else if ((c >='a'&&c<='z')||(c>='A'&&c<='Z')) { // Names i.e. Text
64
c = instring.charAt(i);
65
if ((c >='a'&&c<='z')||(c>='A'&&c<='Z')||(c>='0'&&c<='9')||c=='_'){
72
maketoken('name',str,from,i,row);
73
} else if (c >= '0' && c <= '9') { // Number token
77
c = instring.charAt(i);
78
if (c < '0' || c > '9') break;
87
if (c < '0' || c > '9') break;
101
if (c < '0' || c > '9') error('Bad Exponent in Number: ',str,row);
105
c=instring.charAt(i);
106
}while(c>='0'&&c<='9');
111
error('Bad Number: ',str,row);
115
maketoken('number',n,from,i,row);
117
error('Bad Number: ',str,row);
120
} else if(c=='\''||c=='"'){ // String .. handles c style breaking codes
125
c=instring.charAt(i);
128
if((c=='\n')||(c=='\r')||(c == '')) row++; // Add row if this white space is a row terminator
129
error('Unterminated String: ',str,row);
137
error('Unterminated String: ',str,row);
139
c=instring.charAt(i);
141
if(c=='b'){ c='\b'; break; }
142
if(c=='f'){ c='\f'; break; }
143
if(c=='n'){ c='\n'; break; }
144
if(c=='r'){ c='\r'; break; }
145
if(c=='t'){ c='\t'; break; }
148
error('Unterminated String: ',str,row);
150
c = parseInt(this.substr(i + 1, 4), 16);
151
if (!isFinite(c) || c < 0) {
152
error('Unterminated String: ',str,row);
154
c = String.fromCharCode(c);
163
maketoken('string',str,from,i,row);
164
c=instring.charAt(i);
166
} else if (c=='/'&&instring.charAt(i+1)=='/'){ // Comment of // type ... does not cover block comments
169
c=instring.charAt(i);
170
if (c=='\n'||c=='\r'||c=='') {
176
} else if (c=='/'&&instring.charAt(i+1)=='*'){ // Block comment of /* type
179
c=instring.charAt(i);
180
if ((c=='*'&&instring.charAt(i+1)=='/')||(i==length)) {
182
c=instring.charAt(i);
185
if (c=='\n'||c=='\r'||c=='') {
190
}else if(inprefix.indexOf(c) >= 0) { // Multi-character Operators
194
c=instring.charAt(i);
195
if (i >= length || insuffix.indexOf(c) < 0) {
201
maketoken('operator',str,from,i,row);
202
} else { // Single-character Operators
204
maketoken('operator',c,from,i,row);
205
c = instring.charAt(i);
216
oplist=document.getElementById('infobox');
218
string_tokenize="foo // Lederhosen\nsin'foo'(200.0*r)+(-21.4/1.51e-6)+/*Plodder*/(Feeeb+400)*ln(3000);\ncos(200.1-atan2(41.0,51.0));\nHello+85-4*2.6-1.51e-6;";
220
tokenize(string_tokenize,"<>+-&","=>&:");
223
// Iterate over token objects and print kind of each token and token type in window
224
printout=document.getElementById('infobox');
225
for(i=0;i<tokens.length;i++){
226
printout.innerHTML+=tokens[i].kind+" "+tokens[i].val+" ("+tokens[i].from+"-"+tokens[i].to+") at: "+tokens[i].row+"<br>";