28std::map<std::string, TokenType> build_keywords_map() {
29 std::map<std::string, TokenType> map;
31 if (mapping.str.length() > 1 && (std::isalpha(mapping.str[0]) != 0)) {
32 map.emplace(mapping.str, mapping.type);
38using OpMap = std::map<char, std::vector<TokenMapping>>;
40OpMap build_operator_map() {
43 if (!mapping.str.empty() && (std::isalpha(mapping.str[0]) == 0)) {
44 map[mapping.str[0]].push_back(mapping);
49 for (
auto& pair : map) {
50 std::ranges::sort(pair.second,
52 return a.str.length() > b.str.length();
60const std::map<std::string, TokenType> Tokenizer::keywords =
62static const OpMap operator_map = build_operator_map();
67 std::vector<Token> tokens;
68 while (peek() !=
'\0') {
69 tokens.push_back(nextToken());
71 tokens.push_back(makeToken(TokenType::EndOfFile));
75Token Tokenizer::nextToken() {
76 skipWhitespaceAndComments();
79 start_column = column;
81 return makeToken(TokenType::EndOfFile);
88 return makeToken(TokenType::Newline);
91 if ((std::isalpha(c) != 0) || c ==
'_') {
94 if (std::isdigit(c) != 0) {
98 if ((std::isalpha(peek(1)) != 0) || peek(1) ==
'_') {
100 while ((std::isalnum(peek()) != 0) || peek() ==
'_') {
103 return makeToken(TokenType::Identifier);
108 if (source.substr(current, std::string(
"<global").length()) ==
110 return globalDeclaration();
114 if (operator_map.contains(c)) {
115 const auto& possible_tokens = operator_map.at(c);
117 std::ranges::find_if(possible_tokens, [&](
const auto& mapping) {
118 return source.substr(current, mapping.str.length()) ==
121 if (it != possible_tokens.end()) {
122 current += it->str.length();
123 return makeToken(it->type);
128 return makeToken(TokenType::Invalid, std::string(1, c));
131void Tokenizer::skipWhitespaceAndComments() {
141 while (peek() !=
'\n' && peek() !=
'\0') {
151char Tokenizer::peek(
int offset)
const {
152 if (current + offset >= source.length()) {
155 return source[current + offset];
158char Tokenizer::advance() {
159 if (current < source.length()) {
160 if (source[current] ==
'\n') {
168 return source[current - 1];
171Token Tokenizer::makeToken(
TokenType type,
const std::string& value)
const {
173 range.start.line = start_line;
174 range.start.column = start_column;
175 range.end.line = line;
179 return {.type = type,
181 value.empty() ? source.substr(start, current - start) : value,
185Token Tokenizer::identifier() {
186 while ((std::isalnum(peek()) != 0) || peek() ==
'_') {
189 std::string text = source.substr(start, current - start);
190 auto it = keywords.find(text);
191 if (it != keywords.end()) {
192 return makeToken(it->second);
194 return makeToken(TokenType::Identifier);
197Token Tokenizer::number() {
199 if (peek() ==
'0' && (peek(1) ==
'x' || peek(1) ==
'X')) {
205 while ((std::isdigit(peek()) != 0) ||
206 (is_hex && (std::isxdigit(peek()) != 0))) {
210 if (peek() ==
'.' && (std::isdigit(peek(1)) != 0)) {
212 while (std::isdigit(peek()) != 0) {
217 if (is_hex && (peek() ==
'p' || peek() ==
'P')) {
219 if (peek() ==
'+' || peek() ==
'-') {
222 while (std::isdigit(peek()) != 0) {
225 }
else if (!is_hex && (peek() ==
'e' || peek() ==
'E')) {
227 if (peek() ==
'+' || peek() ==
'-') {
230 while (std::isdigit(peek()) != 0) {
238Token Tokenizer::globalDeclaration() {
241 while (depth > 0 && peek() !=
'\0') {
245 }
else if (c ==
'>') {
252 return makeToken(TokenType::Global);
254 return makeToken(TokenType::Invalid, source.substr(start, current - start));
std::vector< Token > tokenize()
Tokenizer(std::string source)
constexpr std::array TOKEN_MAPPINGS