/lenasys/trunk

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/lenasys/trunk

« back to all changes in this revision

Viewing changes to Code Tokenizer and Parser/Parser_Test.js

  • Committer: Gustav Hartvigsson
  • Date: 2013-04-12 19:13:58 UTC
  • Revision ID: gustav.hartvigsson@gmail.com-20130412191358-lvnmll48cw7idkzk
added:
* COPYING - licensing information
* COPYRIGHT_HEADER - the header that should be in every file related to
  the project.
* README - Information about the project.
* lgpl-3.0 - The license that is used by this project.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// Array to hold the tokens                             
 
2
var tokens = [];  
 
3
/* 
 
4
 * Token class and storage definition                                                                   
 
5
 */
 
6
function token(kind, val, fromchar, tochar, row) {
 
7
    this.kind = kind;
 
8
    this.val = val;
 
9
    this.from = fromchar;
 
10
    this.to = tochar;
 
11
    this.row = row;
 
12
}          
 
13
/*
 
14
 * Store token in tokens array
 
15
 * Creates a new token object using the constructor
 
16
 */
 
17
function makeToken(kind, val, from, to, rowno) {
 
18
        newtoken = new token(kind, val, from, to, rowno);
 
19
        tokens.push(newtoken);
 
20
}
 
21
/*
 
22
 * Writes error from tokenizer
 
23
 */             
 
24
function error(str, val, row) {
 
25
        alert("Tokenizer Error: " + str + val + " at row " + row);
 
26
}
 
27
/*
 
28
 * Tokenize function 
 
29
 * Tokenizer partly based on ideas from the very clever tokenizer written by Douglas Cockford
 
30
 * The tokenizer is passed a string, and a string of prefix and suffix terminators
 
31
 */             
 
32
function tokenize(instring, inprefix, insuffix) {           
 
33
        // index of the start of the token.
 
34
        var from;                       
 
35
        // index of the current character.
 
36
        var i = 0;                      
 
37
        // length of the string
 
38
        var length = instring.length;           
 
39
    // current character.
 
40
        var c;                          
 
41
    // current numerical value
 
42
        var n;                          
 
43
    // current quote character
 
44
        var q;                          
 
45
    // current string value.
 
46
        var str;                        
 
47
        // current row value            
 
48
        var row=1;                                                                              
 
49
        c = instring.charAt(i);
 
50
        while(c) {                      
 
51
        from = i;
 
52
                // White space
 
53
        if(c <= ' ') {
 
54
                        // Add row if this white space is a row terminator              
 
55
                if((c == '\n') || (c == '\r') || (c == '')) {
 
56
                                row++;
 
57
                        }                                                                                                                                                                                                                                                               
 
58
                    i++;
 
59
                c = instring.charAt(i);
 
60
                        // Names i.e. Text
 
61
        } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {                                  
 
62
                        str = c;
 
63
            i++;
 
64
            for(;;) {
 
65
                c = instring.charAt(i);
 
66
                if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')|| (c == '_')) {
 
67
                    str += c;
 
68
                    i++;
 
69
                } else {
 
70
                    break;
 
71
                }
 
72
            }
 
73
            makeToken('name', str, from, i, row);
 
74
                        // Number token
 
75
                } else if (c >= '0' && c <= '9') {                                                                                              
 
76
                    str = c;
 
77
                    i++;
 
78
                    for(;;) {
 
79
                                c = instring.charAt(i);
 
80
                        if(c < '0' || c > '9') {
 
81
                                break;
 
82
                                }
 
83
                        i++;
 
84
                        str += c;
 
85
                    }
 
86
                    if(c == '.') {
 
87
                        i++;
 
88
                        str += c;
 
89
                        for(;;){
 
90
                                c=instring.charAt(i);
 
91
                            if(c < '0' || c > '9'){ 
 
92
                                        break;
 
93
                                        }
 
94
                            i++;
 
95
                            str += c;
 
96
                            }
 
97
                    }
 
98
                    if(c == 'e' || c =='E') {
 
99
                        i++;
 
100
                        str += c;
 
101
                                c=instring.charAt(i);
 
102
                        if(c == '-' || c == '+') {
 
103
                            i++;
 
104
                            str += c;
 
105
                                        c=instring.charAt(i);
 
106
                        }
 
107
                        if(c < '0' || c > '9') {
 
108
                                        error('Bad Exponent in Number: ',str,row);
 
109
                                }
 
110
                        do {
 
111
                            i++;
 
112
                            str += c;
 
113
                                        c = instring.charAt(i);
 
114
                        } while(c >= '0' && c <= '9');
 
115
                    }
 
116
                    if(c >= 'a' && c <= 'z') {
 
117
                        str += c;
 
118
                        i++;
 
119
                        error('Bad Number: ',str,row);
 
120
                    }
 
121
                    n =+ str;
 
122
                    if(isFinite(n)) {
 
123
                                makeToken('number', n, from, i, row);                                   
 
124
                    } else {
 
125
                        error('Bad Number: ', str, row);
 
126
                    }
 
127
                        // String .. handles c style breaking codes
 
128
                } else if (c == '\'' || c == '"') {                                                                                                                                                             
 
129
                    str = '';
 
130
                    q = c;
 
131
                    i++;
 
132
                    for(;;) {
 
133
                                c = instring.charAt(i);
 
134
                                if(c < ' ') {
 
135
                                        if((c == '\n') || (c == '\r') || (c == '')) {
 
136
                                                // Add row if this white space is a row terminator
 
137
                                                row++;
 
138
                                        } else {
 
139
                                                error('Unterminated String: ', str, row);                                               
 
140
                                        }
 
141
                                }
 
142
                                if (c == q) {
 
143
                                        break;
 
144
                                }
 
145
                                if(c == '\\') {
 
146
                                        i++;
 
147
                                        if(i >= length) {
 
148
                                                error('Unterminated String: ', str, row);                                               
 
149
                                        }
 
150
                                        c = instring.charAt(i);
 
151
                                        if(c == 'b') { 
 
152
                                                c = '\b'; 
 
153
                                                break; 
 
154
                                        }
 
155
                                        if(c == 'f') { 
 
156
                                                c = '\f'; 
 
157
                                                break; 
 
158
                                        }
 
159
                                        if(c == 'n') { 
 
160
                                                c = '\n'; 
 
161
                                                break; 
 
162
                                        }
 
163
                                        if(c == 'r') { 
 
164
                                                c = '\r'; 
 
165
                                                break; 
 
166
                                        }
 
167
                                        if(c == 't') { 
 
168
                                                c = '\t'; 
 
169
                                                break; 
 
170
                                        }
 
171
                                        if(c == 'u') {
 
172
                                                if(i >= length) {
 
173
                                                        error('Unterminated String: ', str, row);                                               
 
174
                                                }
 
175
                                                c = parseInt(this.substr(i + 1, 4), 16);
 
176
                                                if(!isFinite(c) || c < 0) {
 
177
                                                        error('Unterminated String: ', str, row);                                               
 
178
                                                }
 
179
                                                c = String.fromCharCode(c);
 
180
                                                i+=4;
 
181
                                                break;                              
 
182
                                        }
 
183
                                }
 
184
                                str += c;
 
185
                                i++;
 
186
                    }
 
187
                    i++;
 
188
                    makeToken('string', str, from, i, row);
 
189
                c = instring.charAt(i);
 
190
                        // Comment of // type ... does not cover block comments
 
191
                } else if (c == '/' && instring.charAt(i+1) == '/') {                                                           
 
192
                    i++;
 
193
                    for(;;) {
 
194
                                c=instring.charAt(i);
 
195
                        if(c == '\n' || c == '\r' || c == '') {
 
196
                            row++;
 
197
                            break;
 
198
                                }
 
199
                        i++;
 
200
                    }
 
201
                                // Block comment of /* type
 
202
                } else if (c == '/' && instring.charAt(i+1) == '*') {                                                           
 
203
                    i++;
 
204
                    for(;;) {
 
205
                                c=instring.charAt(i);
 
206
                        if((c == '*' && instring.charAt(i+1) == '/') || (i == length)) {
 
207
                            i+=2;
 
208
                                        c=instring.charAt(i);
 
209
                            break;
 
210
                                }
 
211
                        if(c == '\n' || c == '\r' || c == '') {
 
212
                            row++;
 
213
                                }
 
214
                        i++;
 
215
                    }
 
216
                        // Multi-character Operators
 
217
                } else if (inprefix.indexOf(c) >= 0) {                                                                                  
 
218
                        str = c;
 
219
            i++;
 
220
            while(true) {
 
221
                        c=instring.charAt(i);
 
222
                if(i >= length || insuffix.indexOf(c) < 0) {
 
223
                        break;
 
224
                }
 
225
                str += c;
 
226
                i++;
 
227
            }
 
228
                        // Single-character Operators
 
229
                makeToken('operator', str, from, i, row);
 
230
        } else {                                                                                                                                                                                                        
 
231
                i++;
 
232
                makeToken('operator', c, from, i, row);
 
233
                c = instring.charAt(i);
 
234
        }
 
235
        }
 
236
}
 
237
 
 
238
function newButton() {
 
239
        var funclist;
 
240
        var oplist;
 
241
        oplist = document.getElementById('infobox');
 
242
        string_tokenize = "foo // Lederhosen\nsin'foo'(200.0*r)+(-21.4/1.51e-6)+/*Plodder*/(Feeeb+400)*ln(3000);\ncos(200.1-atan2(41.0,51.0));\nHello+85-4*2.6-1.51e-6;";
 
243
        tokenize(string_tokenize, "<>+-&","=>&:");                              
 
244
        // Iterate over token objects and print kind of each token and token type in window 
 
245
        printout = document.getElementById('infobox');
 
246
        for(i = 0;i < tokens.length;i++) {
 
247
                printout.innerHTML += tokens[i].kind + " " + tokens[i].val + " (" + tokens[i].from + "-" + tokens[i].to + ") at: " + tokens[i].row + "<br>";
 
248
        }       
 
249
}