diff --git a/src/baital.py b/src/baital.py index 075a7b4..f62041b 100644 --- a/src/baital.py +++ b/src/baital.py @@ -29,12 +29,14 @@ import numpy as np import time import textwrap +import utils +import random -def runPreprocess(inputfile, twise, outputdir, benchmarkName, strategy, isApprox, epsilon, delta): +def runPreprocess(inputfile, twise, outputdir, benchmarkName, strategy, preprocessonly, isApprox, epsilon, delta): # if strategy == 3: # cnf_combinations.run(inputfile, twise, 2, outputdir, epsilon, delta) # combinationsFile = os.path.join(outputdir, benchmarkName + '.count') - if (strategy == 1 or strategy == 3): + if (strategy == 1 or strategy == 3 or preprocessonly): if isApprox: cnf_combinations.run(inputfile, twise, 3, outputdir, epsilon, delta) combinationsFile = os.path.join(outputdir, benchmarkName + '_' + str(twise) + '.acomb') @@ -72,7 +74,7 @@ def run(dimacscnf, strategy, twise, samples, descoverage, spr, rounds, outputdir ptime = -1 else: pstart = time.time() - combinationsFile = runPreprocess(dimacscnf, twise, outputdir, benchmarkName, strategy, papprx, pepsilon, pdelta) + combinationsFile = runPreprocess(dimacscnf, twise, outputdir, benchmarkName, strategy, preprocessonly, papprx, pepsilon, pdelta) ptime = time.time() - pstart if combinationsFile != '' else -1 if preprocessonly: @@ -152,7 +154,7 @@ def check_rescombfile(combinations_file, twise): return len(line.split(",")) == twise epilog=textwrap.dedent('''\ -The tool performs 3 steps: +The tool executes 3 steps: Step 1: preprocessing. Used for strategies 1, and 3. Has 2 options that are controlled by --strategy and --preprocess-approximate options (i) Lists the combinations of size allowed by cnf constraints (Could take hours for twise=2, infeasible for large models for twise=3). Results could be reused at step 3. Precomputed file can be provided with --preprocess-file option, expected to have .comb extension. @@ -199,7 +201,7 @@ def main(): parser.add_argument("--weight-function", type=int, default=2, choices=range(1, 8), help="Function number between 1 and 7 for weight generation, used in strategies 1, 2, 3, and 4", dest='funcNumber') parser.add_argument("--no-sampling", action='store_true', help="skip step 2, computes only the coverage of a provided .sample file with --samples-file", dest='nosampling') - parser.add_argument("--samples-file", type=str, default='', help="file with samples to compute the coverage for --no-sampling option. Shall have .samples extension", dest="outersamplefile") + parser.add_argument("--samples-file", type=str, default='', help="file with samples to compute the coverage for --no-sampling option. Shall have .samples extension", dest="externalsamplefile") parser.add_argument("--maxcov-file", type=str, default='', help="file with pregenerated list of satisfiable feature combinations for the step 3. Shall have .comb extension", dest="rescombfile") parser.add_argument("--no-maxcov", action='store_true', help="Compute only number of combinatons in samples, instead of coverage", dest='nocomb') @@ -235,10 +237,10 @@ def main(): print("Delta and epsilon shall be in range (0,1)") sys.exit(1) - if args.nosampling and not args.outersamplefile: - print("--args.nosampling requires file with sample provided with --samples-file") + if args.nosampling and not args.externalsamplefile: + print("--args.nosampling must be used with --samples-file") sys.exit(1) - if args.outersamplefile[-8:] != '.samples': + if args.externalsamplefile[-8:] != '.samples': print("--samples-file is expected to have '.samples' extension") sys.exit(1) #TODO verify --samples-file format @@ -254,7 +256,7 @@ def main(): np.random.seed(args.seed) random.seed(args.seed) - run(args.DIMACSCNF, args.strategy, args.twise, args.samples, args.descoverage, args.spr, args.rounds, args.outputdir, args.outputfile, args.nocomb, args.combfile, args.papprx, args.pdelta, args.pepsilon, args.funcNumber, args.rescombfile, args.nosampling, args.outersamplefile, args.apprx, args.epsilon, args.delta, args.preprocess) + run(args.DIMACSCNF, args.strategy, args.twise, args.samples, args.descoverage, args.spr, args.rounds, args.outputdir, args.outputfile, args.nocomb, args.combfile, args.papprx, args.pdelta, args.pepsilon, args.funcNumber, args.rescombfile, args.nosampling, args.externalsamplefile, args.apprx, args.epsilon, args.delta, args.preprocess) if __name__== "__main__": main() diff --git a/src/cnf_combinations.py b/src/cnf_combinations.py index 599e2d4..c99a4fb 100644 --- a/src/cnf_combinations.py +++ b/src/cnf_combinations.py @@ -30,7 +30,7 @@ from pathlib import Path import utils - +TMPCNFSUFFIX = '_tmp.cnf' ###combs is a list of precomputed combinations of smaller sizes: combs[x] contains combinatios of size x+1 def get_combinations(nVars, clauses, size, outputfile, combs): @@ -199,25 +199,28 @@ def rmfile(filename): def runApproxmc(tmpCNF, epsilon, delta): approxOutput='out.pmc' - rmfile(approxOutput) - - cmd = 'approxmc --seed ' + str(random.randint(1,10000)) + ' --epsilon ' + str(epsilon) + ' --delta ' + str(delta) + ' ' + tmpCNF + ' >' + approxOutput - start = time.time() - os.system(cmd) - total_user_time = time.time() - start - result = -1 - with open(approxOutput) as f: - for line in f: - if line.startswith('s mc'): #Note the version of ApproxMC - number= int(line.strip().split(' ')[2].strip()) - result = number - break - return result - + try: + cmd = 'approxmc --seed ' + str(random.randint(1,10000)) + ' --epsilon ' + str(epsilon) + ' --delta ' + str(delta) + ' ' + tmpCNF + ' >' + approxOutput + start = time.time() + os.system(cmd) + total_user_time = time.time() - start + result = -1 + with open(approxOutput) as f: + for line in f: + if line.startswith('s mc'): #Note the version of ApproxMC + number= int(line.strip().split(' ')[2].strip()) + result = number + break + os.remove(approxOutput) + return result + except: + print(f"Approxmc call failed.") + sys.exit(1) def approxComb(nVars, clauses, twise, tmpCNF, epsilon, delta): generateGFCNF(nVars, clauses, twise, tmpCNF) result = runApproxmc(tmpCNF, epsilon, delta) + os.remove(tmpCNF) return result def approxCombLiteral(nVars, clauses, twise, outputfile, tmpCNF, epsilon, delta): @@ -237,6 +240,7 @@ def approxCombLiteral(nVars, clauses, twise, outputfile, tmpCNF, epsilon, delta) f.write(str(twise)+ '\n') for key in res.keys(): f.write(str(key) + ' ' + str(res[key]) + '\n') + os.remove(tmpCNF) return res #----------------------------------------------------------------------------------------------------- @@ -246,7 +250,7 @@ def run(dimacscnf, twise, mode, outputdir, epsilon, delta): benchmarkName = os.path.basename(dimacscnf).split('.')[0] if mode == 2: start_full = time.time() - res = approxComb(nVars, clauses, twise, benchmarkName + '_tmp.cnf', epsilon, delta) + res = approxComb(nVars, clauses, twise, benchmarkName + TMPCNFSUFFIX, epsilon, delta) print("Approximate number of combinations is " + str(res)) print("Total time: " + str(time.time() - start_full)) # elif mode == 4: #not used anymore @@ -255,7 +259,7 @@ def run(dimacscnf, twise, mode, outputdir, epsilon, delta): # print("Total time: " + str(time.time() - start_full)) elif mode == 3: start_full = time.time() - res = approxCombLiteral(nVars, clauses, twise, os.path.join(outputdir, benchmarkName + '_' + str(twise) + '.acomb'), benchmarkName + '_tmp.cnf', epsilon, delta) + res = approxCombLiteral(nVars, clauses, twise, os.path.join(outputdir, benchmarkName + '_' + str(twise) + '.acomb'), benchmarkName + TMPCNFSUFFIX, epsilon, delta) print("Total time: " + str(time.time() - start_full)) elif mode == 1: combs = [] @@ -278,9 +282,9 @@ def run(dimacscnf, twise, mode, outputdir, epsilon, delta): parser.add_argument('DIMACSCNF', nargs='?', type=str, default="", help='input cnf file in dimacs format') mode = parser.add_mutually_exclusive_group() mode.add_argument('--combinations', action='store_true', help="counts number of combinations, this is the default mode", dest='m1') - mode.add_argument('--model-count', action='store_true', help="counts number of models for each literal", dest='m2') - mode.add_argument('--approximate', action='store_true', help="counts approximate number of combinations", dest='m3') - mode.add_argument('--approximate-literal', action='store_true', help="counts approximate number of combinations for each literal", dest='m4') + #mode.add_argument('--model-count', action='store_true', help="counts number of models for each literal", dest='m4') + mode.add_argument('--approximate', action='store_true', help="counts approximate number of combinations", dest='m2') + mode.add_argument('--approximate-literal', action='store_true', help="counts approximate number of combinations for each literal", dest='m3') parser.add_argument("--delta", type=float, default=0.05, help="Delta for approximate counting", dest='delta') parser.add_argument("--epsilon", type=float, default=0.05, help="Epsilon for approximate counting", dest='epsilon') parser.add_argument("--seed", type=int, help="Random seed. Unused if approximate is not selected", dest='seed') @@ -297,8 +301,8 @@ def run(dimacscnf, twise, mode, outputdir, epsilon, delta): mode = 2 elif args.m3: mode =3 - elif args.m4: - mode =4 + #elif args.m4: + # mode =4 Path(args.outputdir).mkdir(parents=True, exist_ok=True) run(args.DIMACSCNF, args.size, mode, args.outputdir, args.epsilon, args.delta) diff --git a/src/sampling.py b/src/sampling.py index b2fef8d..cdec7f9 100644 --- a/src/sampling.py +++ b/src/sampling.py @@ -87,7 +87,7 @@ def loadApproxCount(combinationsFile): with open(combinationsFile) as f: lines = f.readlines() try: - for line in lines: + for line in lines[1:]: spl = line.strip().split(' ') res.update({int(spl[0]): int(spl[1])}) return res @@ -117,7 +117,7 @@ def loadMaxComb(nvars, twise, combinationsFile): res[val] +=1 return res except: - print(f"Wrong format of the file {combinationsFile}. Each line shall contain a comma separated combination of size {twise}") + print(f"Exception during the processing of {combinationsFile}. Each line shall contain a comma separated combination of size {twise}. Is {combinationsFile} corresponds to the input file?") sys.exit(1) # Attempts to get the total number of combinations or models from .count files. @@ -207,30 +207,32 @@ def run(nSamples, rounds, dimacscnf, outputFile, twise, strategy, descoverage=No for i in range(len(newSamplesLines)): if nSamples <0 or roundN != rounds-1 or roundN * samplesperround + i < nSamples: #ignore extra samples in case of non divisible by number of rounds lineParts = newSamplesLines[i].strip().split(',') - s = list(map(int, lineParts[1].strip().split(' '))) - if strategy == 3 or strategy == 4: - utils.updateBoxesCoverage(varsBoxes, twise, s) - elif strategy == 5: - for val in s: - count[val] +=1 - elif strategy == 1 or strategy == 2: - utils.getTuples_rec(s, twise, trie, count, [], True) - else: - print('Unknown strategy') - sys.exit(1) - # Copy samples to output file + if roundN != rounds-1: + s = list(map(int, lineParts[1].strip().split(' '))) + if strategy == 3 or strategy == 4: + utils.updateBoxesCoverage(varsBoxes, twise, s) + elif strategy == 5: + for val in s: + count[val] +=1 + elif strategy == 1 or strategy == 2: + utils.getTuples_rec(s, twise, trie, count, [], True) + else: + print('Unknown strategy') + sys.exit(1) + # Copy samples to output file sampleNumber = roundN * samplesperround + int(lineParts[0].strip()) output.write(str(sampleNumber) + ',' + lineParts[1] + '\n') os.remove(TMPSAMPLEFILE) # Update counter for strategies 3 and 4 - done once, not after each new sample - if strategy == 3 or strategy == 4: - count = boxesCoveragePerLiteral(varsBoxes, nBoxesLit, nvars, twise) - print("Current approximation of sampled combinations " + str(sum(count.values()) / twise) ) - - # Generate weights for next round - weightFunctions.generateWeights(count, nvars, strategy, maxComb, WEIGHTFILEPREF, roundN+2, funcNumber) + if roundN != rounds-1: + if strategy == 3 or strategy == 4: + count = boxesCoveragePerLiteral(varsBoxes, nBoxesLit, nvars, twise) + print("Current approximation of sampled combinations " + str(sum(count.values()) / twise) ) + + # Generate weights for next round + weightFunctions.generateWeights(count, nvars, strategy, maxComb, WEIGHTFILEPREF, roundN+2, funcNumber) print("The time taken by round " + str(roundN+1) + " :" + str(time.time()-round_start)) print("Round " + str(roundN+1) + ' finished...') @@ -248,12 +250,14 @@ def run(nSamples, rounds, dimacscnf, outputFile, twise, strategy, descoverage=No output.close() print("The time taken by sampling:", time.time()-start) - #cleanup os.remove(PICKLEFILE) - for roundN in range(rounds+1): - os.remove(WEIGHTFILEPREF + str(roundN+1) + '.txt') + for rn in range(roundN+1): + os.remove(WEIGHTFILEPREF + str(rn+1) + '.txt') + if descoverage and roundN < rounds-1: + os.remove(WEIGHTFILEPREF + str(roundN+2) + '.txt') + epilog=textwrap.dedent('''\ Strategies information: strategies define what parameter is used to select weights for the next round. The parameter is computed for each literal. The following lists the parameter for each strategy.