diff --git a/data/convert.py b/data/convert.py new file mode 100644 index 00000000..2412a5b1 --- /dev/null +++ b/data/convert.py @@ -0,0 +1,144 @@ + +import json + + +letter_dict = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] + +domain = ['entity'] + + +def num2letter(num): + num = int(num) + result = '' + while True: + div = num // 26 + mod = num % 26 + result = letter_dict[mod] + result + num = div + if div == 0: + break + + return result + + +def load(file, pred_num): + fi = open(file, 'r') + preds = [] + evids = [] + for line in fi: + line = line.strip() + linelist = line.split('\t') + sub = 'Ent_' + num2letter(linelist[0]) + pred = 'pred_' + num2letter(linelist[1]) + obj = 'Ent_' + num2letter(linelist[2]) + + pred_inv = 'pred_' + num2letter(int(linelist[1]) + int(pred_num)) + + if pred not in preds: + preds.append(pred) + preds.append(pred_inv) + + evids.append(pred + '(' + sub + ', ' + obj + ')') + evids.append(pred_inv + '(' + obj + ', ' + sub + ')') + + fi.close() + return preds, evids + + +def load_rules(file): + tlogc_rules = json.load(open(file, 'r')) + + formulas = [] + for hr in tlogc_rules: + for rule in tlogc_rules[hr]: + formula_str = rule2formula(rule) + formulas.append([rule['conf'], formula_str]) + + return formulas + + +def load_learnt_rules(file): + weighted_rules = json.load(open(file, 'r')) + formulas = [] + for rule in weighted_rules: + formula_str = rule2formula(rule) + formulas.append([rule['conf'], formula_str]) + + return formulas + + +def rule2formula(rule): + i = 0 + formula = '' + sub0 = '' + obj0 = '' + for rel in rule['body_rels']: + pred = 'pred_' + num2letter(rel) + sub = 'var_' + letter_dict[i] + i += 1 + obj = 'var_' + letter_dict[i] + if formula != '': + formula += '^' + else: + sub0 = sub + formula += pred + '(' + sub + ', ' + obj + ')' + obj0 = obj + + pred = 'pred_' + num2letter(rule['head_rel']) + formula += '=>' + pred + '(' + sub0 + ', ' + obj0 + ')' + return formula + + +predicates, train = load('icews14/origin/train.del', 230) + +preds, test = load('icews14/origin/test.del', 230) + +for pred in preds: + if pred not in predicates: + predicates.append(pred) + +for i in range(len(predicates)): + predicates[i] = predicates[i] + '(entity, entity)' + +# mln with unlearnt weight +formulas = load_rules('icews14/origin/tlogic_rules.json') + +fo = open('icews14/mlns/icews14.mln', 'w') +fo.write('//Predicates\n') +for pred in predicates: + fo.write(pred + '\n') +fo.write('\n//Formulas\n') +for formula in formulas: + fo.write(str(formula[0]) + ' ' + formula[1] + '\n') + +fo.close() + +# mln with learnt weight from MLN from tLogicNet +formulas = load_learnt_rules('icews14/rule.txt_0.85_1000_0.03_f2.formula') + +fo = open('icews14/mlns/learnt.tlogicnet.train.icews14.mln', 'w') +fo.write('//Predicates\n') +for pred in predicates: + fo.write(pred + '\n') +fo.write('\n//Formulas\n') +for formula in formulas: + fo.write(str(formula[0]) + ' ' + formula[1] + '\n') + +fo.close() + +# train data +fo = open('icews14/dbs/icews14_train.db', 'w') +for evid in train: + fo.write(evid + '\n') + +fo.close() + +# test data +fo = open('icews14/dbs/icews14_test.db', 'w') +for evid in test: + fo.write(evid + '\n') + +fo.close() + + + diff --git a/examples/icews14/icews14.pracmln b/examples/icews14/icews14.pracmln new file mode 100644 index 00000000..5d7d44c8 Binary files /dev/null and b/examples/icews14/icews14.pracmln differ diff --git a/python3/pracmln/mln/base.py b/python3/pracmln/mln/base.py index 9da4b024..1e4a94fa 100644 --- a/python3/pracmln/mln/base.py +++ b/python3/pracmln/mln/base.py @@ -28,7 +28,7 @@ from ..logic import FirstOrderLogic, FuzzyLogic -import platform +import platform,json from .mrf import MRF from .errors import MLNParsingError from pyparsing import ParseException @@ -76,14 +76,17 @@ class MLN(object): def __init__(self, logic='FirstOrderLogic', grammar='PRACGrammar', mlnfile=None): # instantiate the logic and grammar logic_str = '%s("%s", self)' % (logic, grammar) + # logic_str 是一行 python 代码,eval() 可以执行一行代码然后把结果返回给 self.logic self.logic = eval(logic_str) + # print(type(self.logic)) logger.debug('Creating MLN with %s syntax and %s semantics' % (grammar, logic)) - self._predicates = {} # maps from predicate name to the predicate instance - self.domains = {} # maps from domain names to list of values - self._formulas = [] # list of MLNFormula instances - self.domain_decls = [] - self.weights = [] - self.fixweights = [] + self._predicates = {} # maps from predicate name to the predicate instance,类定义为 pracmln.mln.mlnpreds.Predicate + self.domains = {} # maps from domain names to list of values 所谓的 domain(域) 实际上就是定义(类型)空间, + # 比如实体的类型有['演员','电影','导演'],这个就是 KG 的 domains。 + self._formulas = [] # list of MLNFormula instances, 类定义为 pracmln.logic.fol.FirstOrderLogic.Implication + self.domain_decls = [] ## ? + self.weights = [] # 权重,初始为 0 + self.fixweights = [] # 固定权重 True/False self.vars = {} self._unique_templvars = [] self._probreqs = [] @@ -92,6 +95,7 @@ def __init__(self, logic='FirstOrderLogic', grammar='PRACGrammar', mlnfile=None) if mlnfile is not None: MLN.load(mlnfile, logic=logic, grammar=grammar, mln=self) return + self.closedWorldPreds = [] self.formulaGroups = [] self.templateIdx2GroupIdx = {} @@ -517,7 +521,8 @@ def iter_formulas_printable(self): yield "%-10.6f\t%s" % (f.weight, fstr(f)) else: yield "%s\t%s" % (str(f.weight), fstr(f)) - + + @staticmethod def load(files, logic='FirstOrderLogic', grammar='PRACGrammar', mln=None): ''' @@ -537,6 +542,7 @@ def load(files, logic='FirstOrderLogic', grammar='PRACGrammar', mln=None): for f in files: if isinstance(f, str): p = mlnpath(f) + # print(p.content), 输入加载的内容,Evidence、Query,Rules if p.project is not None: projectpath = p.projectloc text += p.content @@ -544,6 +550,7 @@ def load(files, logic='FirstOrderLogic', grammar='PRACGrammar', mln=None): text += f.content else: raise Exception('Unexpected file specification: %s' % str(f)) dirs = [os.path.dirname(fn) for fn in files] + # print(mln) # MLN self return parse_mln(text, searchpaths=dirs, projectpath=projectpath, logic=logic, grammar=grammar, mln=mln) raise Exception('No mln files given.') @@ -687,7 +694,7 @@ def parse_mln(text, searchpaths=['.'], projectpath=None, logic='FirstOrderLogic' if m is None: raise MLNParsingError("Variable assigment malformed: %s" % line) mln.vars[m.group(1)] = "%s" % m.group(2).strip() - continue + continue # predicate decl or formula with weight else: isHard = False diff --git a/python3/pracmln/mlnlearn.py b/python3/pracmln/mlnlearn.py index b8afe37c..a03e490c 100644 --- a/python3/pracmln/mlnlearn.py +++ b/python3/pracmln/mlnlearn.py @@ -29,6 +29,7 @@ import fnmatch import io import pstats +import sys import tkinter.messagebox import traceback from cProfile import Profile @@ -361,7 +362,7 @@ def run(self): if self.verbose: print(('loaded %d database(s).' % len(dbs))) - watch = StopWatch() + watch = StopWatch() # 一个计时器 if self.verbose: confg = dict(self._config) @@ -375,14 +376,14 @@ def run(self): params = dict([(k, getattr(self, k)) for k in ( 'multicore', 'verbose', 'profile', 'ignore_zero_weight_formulas')]) - # for discriminative learning + # for discriminative learning todo method 初传化 learner 的时候传的是串,什么时候变成对象的? if issubclass(self.method, DiscriminativeLearner): if self.discr_preds == QUERY_PREDS: # use query preds params['qpreds'] = self.qpreds elif self.discr_preds == EVIDENCE_PREDS: # use evidence preds params['epreds'] = self.epreds - # gaussian prior settings + # gaussian prior settings todo 如果使用先验 if self.use_prior: params['prior_mean'] = self.prior_mean params['prior_stdev'] = self.prior_stdev @@ -400,7 +401,7 @@ def run(self): logger.level = eval('logs.%s' % params.get('debug', 'WARNING').upper()) mlnlearnt = None try: - # run the learner + # run the learner, most expensive part mlnlearnt = mln.learn(dbs, self.method, **params) if self.verbose: print() diff --git a/python3/pracmln/mlnquery.py b/python3/pracmln/mlnquery.py index f6309a2b..dff8d730 100644 --- a/python3/pracmln/mlnquery.py +++ b/python3/pracmln/mlnquery.py @@ -30,8 +30,8 @@ import ntpath import traceback from tkinter import Frame, BOTH, Label, Button, OptionMenu, IntVar, Checkbutton, \ - W, E, Entry, messagebox, END, DISABLED, NORMAL, Tk -from tkinter.filedialog import askopenfilename, asksaveasfilename, StringVar + W, E, Entry, messagebox, END, DISABLED, NORMAL, Tk, StringVar +from tkinter.filedialog import askopenfilename, asksaveasfilename from dnutils import logs, ifnone, out diff --git a/python3/pracmln/utils/project.py b/python3/pracmln/utils/project.py index 50242d56..8b889fff 100644 --- a/python3/pracmln/utils/project.py +++ b/python3/pracmln/utils/project.py @@ -295,9 +295,9 @@ def convert(data): """ if isinstance(data, str): return str(data) - elif isinstance(data, collections.Mapping): + elif isinstance(data, collections.abc.Mapping): return dict(list(map(convert, iter(data.items())))) - elif isinstance(data, collections.Iterable): + elif isinstance(data, collections.abc.Iterable): return type(data)(list(map(convert, data))) else: return data @@ -470,10 +470,12 @@ def content(self): """ path = self.resolve_path() if self.project is not None: + # MLNProject proj = MLNProject.open(os.path.join(self.resolve_path(), self.project)) if self.file is None: return proj fileext = self.file.split('.')[-1] + # print(fileext) # mln/db if fileext == 'mln': mln = proj.mlns.get(self.file) if mln is None: raise Exception('Project %s does not contain and MLN named %s' % (self.project, self.file)) diff --git a/python3/sc_test.py b/python3/sc_test.py new file mode 100644 index 00000000..daa341b3 --- /dev/null +++ b/python3/sc_test.py @@ -0,0 +1,119 @@ +""" +Created on Oct 28, 2015 + +@author: nyga +""" +import os + +from pracmln import MLN, Database +from pracmln import query, learn +from pracmln.mlnlearn import EVIDENCE_PREDS +import time + +from pracmln.utils import locs + + +def test_inference_smokers(): + p = os.path.join(locs.examples, 'smokers', 'smokers.pracmln') + mln = MLN(mlnfile=('%s:wts.pybpll.smoking-train-smoking.mln' % p), + grammar='StandardGrammar') + db = Database(mln, dbfile='%s:smoking-test-smaller.db' % p) + for method in ('EnumerationAsk', + 'MC-SAT', + 'WCSPInference', + 'GibbsSampler'): + for multicore in (False, True): + print('=== INFERENCE TEST:', method, '===') + query(queries='Cancer,Smokes,Friends', + method=method, + mln=mln, + db=db, + verbose=True, + multicore=multicore).run() + + +def test_inference_taxonomies(): + p = os.path.join(locs.examples, 'taxonomies', 'taxonomies.pracmln') + mln = MLN(mlnfile=('%s:wts.learned.taxonomy.mln' % p), + grammar='PRACGrammar', + logic='FuzzyLogic') + db = Database(mln, dbfile='%s:evidence.db' % p) + for method in ('EnumerationAsk', 'WCSPInference'): + print('=== INFERENCE TEST:', method, '===') + query(queries='has_sense, action_role', + method=method, + mln=mln, + db=db, + verbose=False, + cw=True).run().write() + + +def test_learning_smokers(): + p = os.path.join(locs.examples, 'smokers', 'smokers.pracmln') + # mlnpath, 文件路径:模型名.mln, + mln = MLN(mlnfile=('%s:smoking.mln' % p), grammar='StandardGrammar') + # mln.write() # print predicates & formulas + db = Database(mln, dbfile='%s:smoking-train.db' % p) + for method in ('BPLL', 'BPLL_CG', 'CLL'): + # for multicore in (True, False): + print('=== LEARNING TEST:', method, '===') + multicore = False + learn(method=method, + mln=mln, + db=db, + verbose=True, + multicore=multicore).run() + break + + +def test_learning_taxonomies(): + p = os.path.join(locs.examples, 'taxonomies', 'taxonomies.pracmln') + mln = MLN(mlnfile=('%s:senses_and_roles.mln' % p), grammar='PRACGrammar') + mln.write() + dbs = Database.load(mln, dbfiles='%s:training.db' % p) + for method in ('DPLL', 'DBPLL_CG', 'DCLL'): + # for multicore in (True, False): + print('=== LEARNING TEST:', method, '===') + multicore = True + learn(method=method, + mln=mln, + db=dbs, + verbose=True, + multicore=multicore, + epreds='is_a', + discr_preds=EVIDENCE_PREDS).run() + + +def test_learning_icews14(): + p = os.path.join(locs.examples, 'icews14', 'icews14.pracmln') + mln = MLN(mlnfile=('%s:icews14.mln' % p), grammar='StandardGrammar') + # mln.write() + db = Database.load(mln, dbfiles='%s:icews14_train.db' % p) + # for method in ('BPLL', 'BPLL_CG', 'CLL'): + method = 'BPLL' + print('=== LEARNING TEST:', method, '===') + multicore = True + learn(method=method, + mln=mln, + db=db, + verbose=True, + multicore=multicore).run() + + + +def runall(): + start = time.time() + # test_inference_smokers() + # test_inference_taxonomies() + # test_learning_smokers() + # test_learning_taxonomies() + + test_learning_icews14() + print() + print('all test finished after', time.time() - start, 'secs') + +def main(): + runall() + +if __name__ == '__main__': + main()