/*
 * Copyright (c) 2006, Adam Dunkels
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the author nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */


//------------------------------------------------------------
// Changelog
//------------------------------------------------------------
//  [V:0.1 / 04.09.2013] :
// Author   : Adam Dunkels
// Web      : http://dunkels.com/adam/ubasic/
//
// original Version from Adam Dunkels without changings
//------------------------------------------------------------
//  [V:Minimal / 24.07.2015] :
// Author   : UB
// EMail    : mc-4u(@)t-online.de
// Web      : www.mikrocontroller-4u.de
// Port for STM32F4
//------------------------------------------------------------


#include "tokenizer.h"
#include <string.h>
#include <stdlib.h>


char *ptr, *nextptr;


//------------------------------------------------------------
#define MAX_NUMLEN 6  // maxvalue = '99999'
#define MAX_HEXLEN 7  // maxvalue = '0xFFFF'


//------------------------------------------------------------
// keyword struct
//------------------------------------------------------------
struct keyword_token {
  char *keyword;     // string of token
  int token;         // number of token
};


static int current_token = TOKENIZER_ERROR;
static int current_expr  =0;

//------------------------------------------------------------
// added by UB for pre-parser
//------------------------------------------------------------
static int akt_token=TOKENIZER_CR;
static int parser_mode=0;
int check_preparse(char *program);
int exchange_keywords(char *program);
int search_goto_gosub_for(char *program);
int search_linenum(char *program);
int check_linenum(int linenum,int mode);
static int tk_failed, tk_akt_linenum;

//------------------------------------------------------------
// Buffer for line-nummers (goto/gosub/for)
//------------------------------------------------------------
struct linenum_buffer {
  int line_nr;
  char *lineptr;
};
#define MAX_LINENUM_BUFFER_DEPTH  100
static struct linenum_buffer linenum_stack[MAX_LINENUM_BUFFER_DEPTH];
static int linenum_stack_ptr;
//------------------------------------------------------------

//------------------------------------------------------------
// keywords in UPPERCASE !!
// sorted by tokenlen
// USER : insert new keyword in the right position
//------------------------------------------------------------
static const struct keyword_token keywords[] = {
  {"RETURN", TOKENIZER_RETURN},
  {"PRINT", TOKENIZER_PRINT},
  {"GOSUB", TOKENIZER_GOSUB},
  {"THEN", TOKENIZER_THEN},
  {"ELSE", TOKENIZER_ELSE},
  {"NEXT", TOKENIZER_NEXT},
  {"GOTO", TOKENIZER_GOTO},
  {"LET", TOKENIZER_LET},
  {"FOR", TOKENIZER_FOR},
  {"END", TOKENIZER_END},
  {"IF", TOKENIZER_IF},
  {"TO", TOKENIZER_TO},
  //------------------------------------
  // NEW TOKENS BY UB
  // sorted by tokenlen
  //------------------------------------
  {"RIGHT$", TOKENIZER_RIGHTSTR},
  {"STEP", TOKENIZER_STEP},
  {"REM", TOKENIZER_REM},
  {"ABS", TOKENIZER_ABS},
  //------------------------------------
  {NULL, TOKENIZER_ERROR}
};

//------------------------------------------------------------
static int singlechar(void)
{
  if(*ptr == '\n') {
    return TOKENIZER_CR;
  } else if(*ptr == ',') {
    return TOKENIZER_COMMA;
  } else if(*ptr == ';') {
    return TOKENIZER_SEMICOLON;
  } else if(*ptr == '+') {
    return TOKENIZER_PLUS;
  } else if(*ptr == '-') {
    return TOKENIZER_MINUS;
  } else if(*ptr == '&') {
    return TOKENIZER_AND;
  } else if(*ptr == '|') {
    return TOKENIZER_OR;
  } else if(*ptr == '^') {
    return TOKENIZER_XOR;
  } else if(*ptr == '*') {
    return TOKENIZER_ASTR;
  } else if(*ptr == '/') {
    return TOKENIZER_SLASH;
  } else if(*ptr == '%') {
    return TOKENIZER_MOD;
  } else if(*ptr == '(') {
    return TOKENIZER_LEFTPAREN;
  } else if(*ptr == ')') {
    return TOKENIZER_RIGHTPAREN;
  } else if(*ptr == '<') {
    return TOKENIZER_LT;
  } else if(*ptr == '>') {
    return TOKENIZER_GT;
  } else if(*ptr == '=') {
    return TOKENIZER_EQ;
  }

  return 0;
}
//------------------------------------------------------------
static int isdigit_ub(char ascii)
{
  if(ascii<48) return(0);
  if(ascii>57) return(0);
  return(1);
}
//------------------------------------------------------------
static int hexdigit(char ascii)
{
  if(ascii<48) return(-1);
  if(ascii>102) return(-1);
  if(ascii<=57) return(ascii-48);
  if(ascii<65) return(-1);
  if(ascii<=70) return(ascii-55);
  if(ascii<97) return(-1);
  return(ascii-87);
}
//------------------------------------------------------------
static int ishex_ub(void)
{
  char wert;

  wert=*ptr;
  if(wert != '0') return(0);
  wert=*(ptr+1);
  if((wert != 'x') && (wert != 'X')) return(0);
  wert=*(ptr+2);
  if(hexdigit(wert)<0) return(0);

  return(1);
}
//------------------------------------------------------------
static int get_next_token(void)
{
  struct keyword_token const *kt;
  int i,ok;

  if(*ptr == 0) {
    akt_token=TOKENIZER_ENDOFINPUT;
    return akt_token;
  }

  if(parser_mode==0) {
    // during pre-parse
    if(*ptr==0x01) {
      // preparsed Keyword-Token
      // Token is allready pre-parsed
      i=(*(ptr+1)&0x7F); // clear msb
      nextptr = ptr + strlen(keywords[i].keyword);
      akt_token=keywords[i].token;
      return akt_token;
    } else if(isdigit_ub(*ptr)) {
      if(ishex_ub() && (akt_token!=TOKENIZER_CR)) {
        // check if hex
        for(i = 3; i < MAX_HEXLEN; ++i) {
          if(hexdigit(ptr[i])<0) {
            nextptr = ptr + i;
            akt_token=TOKENIZER_HEXNUM;
            return akt_token;
          }
        }
      }
      else {
        for(i = 0; i < MAX_NUMLEN; ++i) {
          if(!isdigit_ub(ptr[i])) {
            if(i > 0) {
              nextptr = ptr + i;
              if(akt_token==TOKENIZER_CR) {
                // first number in line = linenum
                akt_token=TOKENIZER_LINENUM;
              }
              else {
                akt_token=TOKENIZER_NUMBER;
              }
              return akt_token;
            } else {
              akt_token=TOKENIZER_ERROR;
              return akt_token;
            }
          }
        }
      }
      akt_token=TOKENIZER_ERROR;
      return akt_token;
    } else if(singlechar()) {
      nextptr = ptr + 1;
      akt_token=singlechar();
      return akt_token;
    } else if(*ptr == '"') {
      nextptr = ptr;
      do {
        ++nextptr;
      } while(*nextptr != '"');
      ++nextptr;
      akt_token=TOKENIZER_STRING;
      return akt_token;
    } else {
      for(kt = keywords; kt->keyword != NULL; ++kt) {
        if(strncmp(ptr, kt->keyword, strlen(kt->keyword)) == 0) {
          nextptr = ptr + strlen(kt->keyword);
          akt_token=kt->token;
          return akt_token;
        }
        else {
          // check upercase & lowercase
          ok=0;
          for(i=0;i<strlen(kt->keyword);i++) {
            if((*(ptr+i)!=kt->keyword[i]) && (*(ptr+i)!=(kt->keyword[i]+0x20))) {
              ok=1;
              break;
            }
          }
          if(ok==0) {
            nextptr = ptr + strlen(kt->keyword);
            akt_token=kt->token;
            return akt_token;
          }
        }
      }
    }

    // var or stringvar
    if(*ptr >= 'a' && *ptr <= 'z') {
      nextptr = ptr + 1;
      akt_token=TOKENIZER_VARIABLE;
      // check if string
      if(*(ptr+1)=='$') {
        nextptr = ptr + 2;
        akt_token=TOKENIZER_STRINGVAR;
      }
      return akt_token;
    }

    akt_token=TOKENIZER_ERROR;
    return akt_token;
  }
  else {
    // after pre-parse
    if(*ptr==0x01) {
      // preparse Keyword-Token
      i=(*(ptr+1)&0x7F); // clear msb
      nextptr = ptr + strlen(keywords[i].keyword);
      akt_token=keywords[i].token;
      return akt_token;
    } else if(*ptr >= 'a' && *ptr <= 'z') {
      // var or stringvar
      nextptr = ptr + 1;
      akt_token=TOKENIZER_VARIABLE;
      // check if string
      if(*(ptr+1)=='$') {
        nextptr = ptr + 2;
        akt_token=TOKENIZER_STRINGVAR;
      }
      return akt_token;
    }
    else if(isdigit_ub(*ptr)) {
      if(ishex_ub() && (akt_token!=TOKENIZER_CR)) {
        // check if hex
        for(i = 3; i < MAX_HEXLEN; ++i) {
          if(hexdigit(ptr[i])<0) {
            nextptr = ptr + i;
            akt_token=TOKENIZER_HEXNUM;
            return akt_token;
          }
        }
      }
      else {
        for(i = 0; i < MAX_NUMLEN; ++i) {
          if(!isdigit_ub(ptr[i])) {
            if(i > 0) {
              nextptr = ptr + i;
              if(akt_token==TOKENIZER_CR) {
                // first number in line = linenum
                akt_token=TOKENIZER_LINENUM;
              }
              else {
                akt_token=TOKENIZER_NUMBER;
              }
              return akt_token;
            } else {
              akt_token=TOKENIZER_ERROR;
              return akt_token;
            }
          }
        }
      }
      akt_token=TOKENIZER_ERROR;
      return akt_token;
    } else if(singlechar()) {
      nextptr = ptr + 1;
      akt_token=singlechar();
      return akt_token;
    } else if(*ptr == '"') {
      nextptr = ptr;
      do {
        ++nextptr;
      } while(*nextptr != '"');
      ++nextptr;
      akt_token=TOKENIZER_STRING;
      return akt_token;
    }

    akt_token=TOKENIZER_ERROR;
    return akt_token;
  }
}
//------------------------------------------------------------
void tokenizer_set_expression(void)
{
  switch(current_token) {
    //-----------------------------------------------
    // ret_value = <int>
    // tokens with integer return values
    // handelt in : "static int factor(void)"
    // USER : insert here all new Tokens with return value <int>
    //-----------------------------------------------
    case TOKENIZER_VARIABLE :
    case TOKENIZER_NUMBER :
    case TOKENIZER_HEXNUM :
    case TOKENIZER_MINUS :
    case TOKENIZER_ABS :   
      current_expr=1;
    break;
    //-----------------------------------------------
    // ret_value = <string>
    // tokens with string return values
    // handelt in : "static char *strpart(void)"
    // USER : insert here all new Tokens with return value <string>
    //-----------------------------------------------
    case TOKENIZER_STRING :
    case TOKENIZER_STRINGVAR :
    case TOKENIZER_RIGHTSTR :         
      current_expr=2;
    break;
    default :
    //-----------------------------------------------
    // ret_value = <void>      
    // tokens with void return values
    // handelt in : "static void statement(void)"
    //-----------------------------------------------
      current_expr=0;
  }
}
//------------------------------------------------------------
void tokenizer_init(char *program)
{
  ptr = program;
  akt_token=TOKENIZER_CR;
  tk_failed =0;
  tk_akt_linenum =0;

  current_token = get_next_token();
  tokenizer_set_expression();
}
//------------------------------------------------------------
int tokenizer_token(void)
{
  return current_token;
}
//------------------------------------------------------------
int tokenizer_is_expr(void)
{
  return current_expr;
}
//------------------------------------------------------------
void tokenizer_next(void)
{
  if(tokenizer_finished()) {
    return;
  }

  ptr = nextptr;
  while(*ptr == ' ') {
    ++ptr;
  }
  current_token = get_next_token();
  tokenizer_set_expression();
}
//------------------------------------------------------------
int tokenizer_num(void)
{
  return atoi(ptr);
}
//------------------------------------------------------------
void tokenizer_string(char *dest, int len)
{
  char *string_end;
  int string_len;

  if(tokenizer_token() != TOKENIZER_STRING) {
    return;
  }
  string_end = strchr(ptr + 1, '"');
  if(string_end == NULL) {
    return;
  }
  string_len = string_end - ptr - 1;
  if(len < string_len) {
    string_len = len;
  }
  memcpy(dest, ptr + 1, string_len);
  dest[string_len] = 0;
}
//------------------------------------------------------------
void tokenizer_error_print(void)
{

}
//------------------------------------------------------------
int tokenizer_finished(void)
{
  return *ptr == 0 || current_token == TOKENIZER_ENDOFINPUT;
}
//------------------------------------------------------------
int tokenizer_variable_num(void)
{
  return *ptr - 'a';
}



//------------------------------------------------------------
// New functions by UB
//------------------------------------------------------------


//------------------------------------------------------------
int tokenizer_hexnum(void)
{
  int n,ret_wert,check;
  char wert;

  ret_wert=0;
  ptr+=2; // skip '0x'
  for(n=2;n<MAX_HEXLEN;n++) {
    wert=*ptr;
    check=hexdigit(wert);
    if(check>=0) {
      ret_wert*=16;
      ret_wert+=check;
      ptr++;
    }
    else break;
  }
  return ret_wert;
}

//------------------------------------------------------------
int tokenizer_pre_parse(char *program)
{
  int status;

  tokenizer_init(program);
  // 1.step
  status=exchange_keywords(program);
  if(status!=1) return(1);
  tokenizer_init(program);
  // 2.step
  status=search_goto_gosub_for(program);
  if(status!=1) return(2);
  tokenizer_init(program);
  // 3.step
  status=search_linenum(program);
  if(status!=1) return(3);
  tokenizer_init(program);

  return 0;
}


//------------------------------------------------------------
int exchange_keywords(char *program)
{
  int akt_token=-1,ok=0;
  char *ptr_pos;
  int kpos;

  parser_mode=0;

  ptr_pos=program;

  ok=0;
  do {
    akt_token=tokenizer_token();
    if(akt_token==TOKENIZER_ERROR) {
      ok=2;
    }
    else if(akt_token==TOKENIZER_ENDOFINPUT) {
      ok=1;
    }
    else if(akt_token==TOKENIZER_LINENUM) {
      if(tk_failed==0) {
        if(tokenizer_num()!=0) tk_akt_linenum = tokenizer_num();
      }
    }
    else {
      for(kpos=0;kpos<100;kpos++) {
        if(keywords[kpos].token==akt_token) {
          // Keyword-Token
          ptr_pos=ptr;
          *ptr_pos=0x01; // keyword-ID
          ptr_pos++;
          *ptr_pos=(kpos|0x80); // set bit7 to mark as symbol
          break;
        }
        else if(keywords[kpos].token==TOKENIZER_ERROR) {
          break;
        }
      }
    }

    if(ok==0) {
      if(akt_token==TOKENIZER_REM) {
        tokenizer_skipline();
      }
      else {
        tokenizer_next();
      }
    }
  }
  while(ok==0);

  parser_mode=1;

  return ok;
}

//------------------------------------------------------------
int search_goto_gosub_for(char *program)
{
  int akt_token=-1,ok=0,n,merker=0;

  linenum_stack_ptr=0;

  ok=0;
	merker=0;
  do {
    akt_token=tokenizer_token();
    if(akt_token==TOKENIZER_ERROR) {
      ok=2;
    }
    else if(akt_token==TOKENIZER_ENDOFINPUT) {
      ok=1;
    }
    else if(akt_token==TOKENIZER_LINENUM) {
      n=tokenizer_num();
      if((tk_failed==0) && (n!=0)) tk_akt_linenum = n;
      if(merker==1) {  // linenum after "FOR" or "GOSUB"
        if(linenum_stack_ptr<MAX_LINENUM_BUFFER_DEPTH) {
          if(check_linenum(n,0)==0) {
            linenum_stack[linenum_stack_ptr].line_nr=n;
            linenum_stack_ptr++;
          }
        }
        merker=0;
      }
    }
    else if(akt_token==TOKENIZER_GOTO) {
      tokenizer_next();
      akt_token=tokenizer_token();
      if(akt_token==TOKENIZER_NUMBER) { // save linenum from "GOTO"
        if(linenum_stack_ptr<MAX_LINENUM_BUFFER_DEPTH) {
          n=tokenizer_num();
          if(check_linenum(n,0)==0) {
            linenum_stack[linenum_stack_ptr].line_nr=n;
            linenum_stack_ptr++;
          }
        }
      }
    }
    else if(akt_token==TOKENIZER_GOSUB) {
      tokenizer_next();
      akt_token=tokenizer_token();
      if(akt_token==TOKENIZER_NUMBER) {  // save linenum from "GOSUB"
        if(linenum_stack_ptr<MAX_LINENUM_BUFFER_DEPTH) {
          n=tokenizer_num();
          if(check_linenum(n,0)==0) {
            linenum_stack[linenum_stack_ptr].line_nr=n;
            linenum_stack_ptr++;
          }
          merker=1; // save next linenum after "GOSUB"
        }
      }
    }
    else if(akt_token==TOKENIZER_FOR) {
      merker=1; // save next linenum after "FOR"
    }

    if(ok==0) {
      if(akt_token==TOKENIZER_REM) {
        tokenizer_skipline();
      }
      else {
        tokenizer_next();
      }
    }
  }
  while(ok==0);

  return ok;
}

//------------------------------------------------------------/
int search_linenum(char *program)
{
  int akt_token=-1,ok=0,n;
  int linecnt=0;

  ok=0;
  do {
    akt_token=tokenizer_token();
    if(akt_token==TOKENIZER_ERROR) {
      ok=2;
    }
    else if(akt_token==TOKENIZER_ENDOFINPUT) {
      ok=1;
    }
    else if(akt_token==TOKENIZER_LINENUM) {
      n=tokenizer_num();
      n=check_linenum(n,1);
      if(n!=0) linecnt++;

      tokenizer_skipline();
      tokenizer_next();
    }
    else {
      tokenizer_next();
    }
  }while(ok==0);

  if(linecnt!=linenum_stack_ptr) ok=3; // error : not all linenumbers found

  return ok;
}

//------------------------------------------------------------
int check_linenum(int linenum, int mode)
{
  int ret_wert=0;
  int n;

  for(n=0;n<linenum_stack_ptr;n++) {
    if(linenum_stack[n].line_nr==linenum) {
      if(mode==1) linenum_stack[n].lineptr=ptr;
      ret_wert=1;
      break;
    }
  }

  return(ret_wert);
}

//------------------------------------------------------------
int tokenizer_jumpfast(int linenum)
{
  int ret_wert=0;
  int n;

  for(n=0;n<linenum_stack_ptr;n++) {
    if(linenum_stack[n].line_nr==linenum) {
      ptr = linenum_stack[n].lineptr;
      akt_token=TOKENIZER_CR;
      current_token = get_next_token();
      tokenizer_set_expression();
      ret_wert=1;
      break;
    }
  }

  return(ret_wert);
}

//------------------------------------------------------------
void tokenizer_skipline(void)
{
  ptr = nextptr-1;
  while((*ptr != '\n') && (*ptr != 0x00)) {
    ++ptr;
  }
  current_token = get_next_token();
  tokenizer_set_expression();
}
//------------------------------------------------------------
int tokenizer_linenum(void)
{
  return tk_akt_linenum;
}
//------------------------------------------------------------
int tokenizer_getkeyword(char token, char *ptr)
{
  int ret_wert,n,nr;

  nr=(token&0x7F); // MSB loeschen
  ret_wert=strlen(keywords[nr].keyword);

  for(n=0;n<ret_wert;n++) {
    ptr[n]=keywords[nr].keyword[n];
  }
  ptr[n]=0x00;

  return(ret_wert);
}
