tokenizer.js

/** @module tokenizer */

import Token from "./token/token.js"
import TextToken from "./token/texttoken.js"
import TagToken from "./token/tagtoken.js"
import Key from "./token/key.js"

const ALPHABET = "abcdefghijklmnopqrstuvwxyz"
const DIGITS = "0123456789"
const CHARACTERS_SPECIAL_TAG_NAME = "*"
const CHARACTERS_SPECIAL_TAG_VALUE = "#:/."

const CHARACTERS_TAG_NAME = [...ALPHABET + CHARACTERS_SPECIAL_TAG_NAME]
const CHARACTERS_TAG_VALUE = [...ALPHABET + DIGITS + CHARACTERS_SPECIAL_TAG_VALUE]

/**
 * @param   {string}  str The text input
 * @returns {Token[]}			An array of tokens
 */
function tokenize(str) {
	const characters = [...str]
	let buffer = ""
	const tokens = []
	const keys = []
	for(const [index, character] of characters.entries()) {
		buffer += character
		const bufferIndex = index - buffer.length + 1
		if (buffer[0] + buffer[1] === "[/") {
			if (buffer.length === 2) {
				continue
			}
			if (keys.length === 0) {
				if (CHARACTERS_TAG_NAME.includes(character.toLowerCase()) === true) {
					keys.push(new Key(character, null))
				} else {
					tokens.push(new TextToken(buffer, bufferIndex))
					buffer = ""
				}
			} else if (keys.length >= 1) {
				const key = keys[keys.length - 1]
				if (CHARACTERS_TAG_NAME.includes(character.toLowerCase()) === true) {
					key._name += character
				} else if (character === "]") {
					tokens.push(new TagToken(Token.NAME.CLOSING_TAG, buffer, bufferIndex, [...keys]))
					keys.splice(0, keys.length)
					buffer = ""
				} else {
					tokens.push(new TextToken(buffer, bufferIndex))
					keys.splice(0, keys.length)
					buffer = ""
				}
			}
		} else if (buffer[0] === "[") {
			if (buffer.length === 1) {
				continue
			}
			if (keys.length === 0) {
				if (CHARACTERS_TAG_NAME.includes(character.toLowerCase()) === true) {
					keys.push(new Key(character, null))
				} else {
					tokens.push(new TextToken(buffer, bufferIndex))
					buffer = ""
				}
			} else if (keys.length >= 1) {
				const key = keys[keys.length - 1]
				if (typeof key.value === "string") {
					if (key.value.length === 0) {
						if (CHARACTERS_TAG_VALUE.includes(character.toLowerCase()) === true || character === "'" || character === '"') {
							key._value += character
						} else {
							tokens.push(new TextToken(buffer, bufferIndex))
							buffer = ""
							keys.splice(0, keys.length)
						}
					} else if (key.value.length >= 1) {
						const parsingQuote = (key.value[0] === "'" || key.value[0] === '"')
						&& (key.value[0] !== key.value[key.value.length -1] || key.value.length === 1)
						if ((key.value[0] === "'" ||  key.value[0] === '"') && (parsingQuote === false && key.value.length > 1 && key.value[0] === key.value[key.value.length -1])) {
							key._value = key.value.substring(1, key.value.length - 1)
						}
						if (CHARACTERS_TAG_VALUE.includes(character.toLowerCase()) === true || parsingQuote) {
							key._value += character
						} else if (character === "]") {
							tokens.push(new TagToken(Token.NAME.OPENING_TAG, buffer, bufferIndex, [...keys]))
							keys.splice(0, keys.length)
							buffer = ""
						} else if (character === " ") {
							keys.push(new Key("", null))
						} else {
							tokens.push(new TextToken(buffer, bufferIndex))
							buffer = ""
							keys.splice(0, keys.length)
						}
					}
				} else if (CHARACTERS_TAG_NAME.includes(character.toLowerCase()) === true) {
					key._name += character
				} else if (character === "]") {
					tokens.push(new TagToken(Token.NAME.OPENING_TAG, buffer, bufferIndex, [...keys]))
					keys.splice(0, keys.length)
					buffer = ""
				} else if (character === "=") {
					key._value = ""
				} else if (key.name.length >= 1 && character === " ") {
					keys.push(new Key("", null))
				} else {
					tokens.push(new TextToken(buffer, bufferIndex))
					buffer = ""
					keys.splice(0, keys.length)
				}
			}
		} else {
			tokens.push(new TextToken(buffer, bufferIndex))
			buffer = ""
		}
	}
	return tokens
}

export {
	tokenize
}