Would some of the experts of this forum (or any others) please take a look
at my attempt to code a javascript tokenizer.
My goal is to seperate a string into either
1. defined keywords (per JSON pseudo hash attempt)
2. strings (starting with letter and optionally followed by more letters or numbers)
3. numbers (integers or decimals, with or without leading +- character)
4. groups of characters not in 1-3 above, like == or != or <= or >=, etc.
My attempt tries to use parts of JSON associative arrays and regular expression evaluations.
I’m close, but am having problems with logic(?)
I’m having trouble with #3 with ‘+’ or ‘-‘ characters
and #4 with groupings of non-keyword elements
The HTML is just a shell to enter string and convert to comma seperated array. My overall goal is to incorporate the code into a RPN evaluator from string entries of algebric form (infix to postfix). I have other functions, but they don’t work well without this part working better.
[code=php]
<html>
<head>
<title>Tokenizer Test</title>
<script type=”text/javascript”>
Tokens = {‘+’:3, ‘-‘:3, ‘*’:5, ‘/’:5, ‘%’:5, ‘^’:6, ‘=’:6,
‘pow’:7, ‘log’:9, ‘exp’:9, ‘sqr’:9, ‘sqrt’:9, ‘atan2’:7,
‘sin’:9, ‘cos’:9, ‘tan’:9, ‘asin’:9, ‘acos’:9, ‘atan’:9,
‘abs’:9, ‘ceil’:9, ‘floor’:9, ’round’:9, ‘random’:9,
‘pi’:9, ‘ln’:9, ‘ln2’:9, ‘ln10’:9, ‘log2e’:9, ‘log10e’:9,
‘(‘:2, ‘)’:2, ‘{‘:2, ‘}’:2, ‘[‘:2, ‘]’:2, ”:1};
var ATest = {
// check parameter against valid keywords (Tokens)
// remove all white space characters with ‘replace’ and force lower case comparisons
// if found in Token hash, return precedence value
// if not Token, check for valid number like: [+-][0]0..9[.][0..9] AS -0.25 OR 34 OR -1.2345
// if valid number, return 0
// otherwise, return -1 as variable like: letter[letter|number] AS tmp OR tmp1 OR t OR t1
precd : function(who) {
var str = who;
str = str.replace(/s/g,”);
str = Tokens[str.toLowerCase()]; // found token (maybe)
if (str == undefined) {
if (/^[+-]?(.d+|d+(.d*)?)$/.test(who)) { // if true, found number
str = 0; // alert(‘Number’);
} else { // found variable
str = -1; // alert(‘Variable’);
}
}
return str;
},
// split string into seperate elements of strArr
// check each element to see if is KEYWORD or part of a NUMBER or VARIABLE string
strToTokens : function(str) {
var strArr = str.split(“”); var tempStr = new String(“”);
var tokens = new Array(); var tokens_index = 0;
var level;
for(var i=0; i<strArr.length; i++) {
level = this.precd(strArr[i]); // alert(level+’:’+strArr[i]+’:’+tempStr+’:’);
if (level <= 0) {
tempStr += strArr[i];
} else {
if(tempStr != “”) { tokens[tokens_index] = tempStr; tokens_index++; }
tempStr = “”;
if (level > 1) { tokens[tokens_index] = strArr[i]; tokens_index++; }
}
}
// if(i==(strArr.length-1)) { // seems to make no difference
if(tempStr != “”) { tokens[tokens_index] = tempStr; }
// }
// }
return(tokens);
}
}
</script>
</head>
<body>
<input type=”text” id=”src” value=””>
<button onClick=”document.getElementById(‘src’).value=””>Reset</button>
<br />
<input type=”text” id=”rslt” value=””>
<button
onClick=”document.getElementById(‘rslt’).value=ATest.strToTokens(document.getElementById(‘src’).value)”>
Convert</button>
<br />
<input type=”radio” name=”tst” value=’abc=123+456|’
onClick=”document.getElementById(‘src’).value=this.value”>
<input type=”radio” name=”tst” value=’abc = 123 + 456 | ‘
onClick=”document.getElementById(‘src’).value=this.value”>
<input type=”radio” name=”tst” value=’abc = 123 + 456| ‘
onClick=”document.getElementById(‘src’).value=this.value”>
<br />Test values
</body>
</html>
Any help is appreciated.
I’ll try to expand description of my attemp if script doesn’t make sense.
I’ll also consider any other ideas toward solution.