-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyzer.py
34 lines (30 loc) · 950 Bytes
/
analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import numpy as np
import pandas as pd
from XMLGrammar.LexGrammarBuilder import LexGrammarBuilder
from XMLGrammar.LexParser import LexParser
builder = LexGrammarBuilder("java")
lexGram = builder.read()
lexer = LexParser(lexGram)
df = pd.read_csv('magicvalue.csv')
filets = []
filesc = []
i = 0
for line in df['source']:
lexer.resetTokens()
lexer.parse(line)
ts = lexer.getInStream()
filets.append(ts)
sc = lexer.getInSource()
filesc.append(sc)
i = i + 1
print(i)
label = {"label": {"clean": 0, "smell": 1}}
df.replace(label, inplace=True)
tokenstream = np.array(filets)
dfts = pd.DataFrame({'label': df['label'], 'source': tokenstream}, columns=['label', 'source'])
dfts.to_csv("ts.csv")
print(tokenstream.shape)
sourcecode = np.array(filesc)
dfsc = pd.DataFrame({'label': df['label'], 'source': sourcecode}, columns=['label', 'source'])
dfsc.to_csv("sc.csv")
print(sourcecode.shape)