This repository has been archived by the owner on Nov 26, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
19 changed files
with
240 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
namespace Lexer | ||
{ | ||
public enum CharType | ||
{ | ||
Invalid, | ||
Special, | ||
Literal | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace Lexer | ||
{ | ||
public interface ILexer | ||
{ | ||
bool Read(string input, int index, out IToken token); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
namespace Lexer | ||
{ | ||
public interface IToken | ||
{ | ||
int Index { get; } | ||
string Value { get; } | ||
ITokenType TokenType { get; } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
using Lexer.Automaton; | ||
|
||
namespace Lexer | ||
{ | ||
public interface ITokenType | ||
{ | ||
IAutomaton Automaton { get; } | ||
string Name { get; } | ||
int Priority { get; } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
using Lexer.Automaton; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
|
||
namespace Lexer | ||
{ | ||
public class Lexer : ILexer | ||
{ | ||
private ITokenType[] types; | ||
public Lexer(IEnumerable<ITokenType> types) | ||
{ | ||
this.types = types.ToArray(); | ||
} | ||
public bool Read(string input, int index, out IToken token) | ||
{ | ||
foreach(var type in types.OrderBy(tt => tt.Priority)) | ||
{ | ||
var automaton = type.Automaton; | ||
HashSet<int> previousStates = null; | ||
var states = new HashSet<int>() { automaton.StartState }; | ||
var length = 0; | ||
for(var charIndex = index; states.Any() && charIndex < input.Length; charIndex++) | ||
{ | ||
previousStates = states; | ||
states = new HashSet<int>(automaton.Step(states, input[charIndex])); | ||
length++; | ||
} | ||
if (!states.Any() && previousStates.Any(s => automaton.AcceptingStates.Contains(s))) | ||
{ | ||
token = new Token(type, input.Substring(index, length-1), index); | ||
return true; | ||
} | ||
else if(states.Any(s => automaton.AcceptingStates.Contains(s))) | ||
{ | ||
token = new Token(type, input.Substring(index, length), index); | ||
return true; | ||
} | ||
} | ||
token = null; | ||
return false; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
|
||
namespace Lexer | ||
{ | ||
public static class LexerExtensions | ||
{ | ||
public static IEnumerable<IToken> Read(this ILexer lexer, string input) | ||
{ | ||
var index = 0; | ||
IToken token; | ||
while (index < input.Length && lexer.Read(input, index, out token)) | ||
{ | ||
yield return token; | ||
index += token.Value.Length; | ||
} | ||
if (index < input.Length) | ||
throw new InvalidOperationException("EOF not reached!"); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
using Lexer.Automaton; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace Lexer | ||
{ | ||
public static class Regex | ||
{ | ||
private static IRegexParser parser = new RegexParser(); | ||
public static IAutomaton Parse(string input) | ||
{ | ||
return parser.Parse(input); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
namespace Lexer | ||
{ | ||
public class Token : IToken | ||
{ | ||
public Token(ITokenType type, string value, int index) | ||
{ | ||
Index = index; | ||
Value = value; | ||
TokenType = type; | ||
} | ||
public int Index { get; } | ||
public string Value { get; } | ||
public ITokenType TokenType { get; } | ||
public override string ToString() | ||
{ | ||
return "\""+Value+"\": "+TokenType.Name; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
using Lexer.Automaton; | ||
|
||
namespace Lexer | ||
{ | ||
public class TokenType : ITokenType | ||
{ | ||
public TokenType(string name, IAutomaton automaton, int priority) | ||
{ | ||
Name = name; | ||
Automaton = automaton; | ||
Priority = priority; | ||
} | ||
|
||
public IAutomaton Automaton { get; } | ||
|
||
public string Name { get; } | ||
|
||
public int Priority { get; } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
using Lexer; | ||
using Microsoft.VisualStudio.TestTools.UnitTesting; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace Tests.UnitTests | ||
{ | ||
[TestClass] | ||
public class LexerTests | ||
{ | ||
[TestMethod] | ||
public void TestLexer() | ||
{ | ||
var typeNumbers = new TokenType("NUMBER", Regex.Parse("\\d+"), 1); | ||
var typeId = new TokenType("ID", Regex.Parse("[a-zA-Z_][a-zA-Z0-9_]*"), 1); | ||
var typeWhitespace = new TokenType("SPACE", Regex.Parse("\\s"), 1); | ||
var lexer = new Lexer.Lexer(new[] | ||
{ | ||
typeNumbers, | ||
typeId, | ||
typeWhitespace | ||
}); | ||
foreach(var token in lexer.Read("Hallo Du 3")) | ||
{ | ||
Console.WriteLine(token); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.