optimizations.py

from myutils import get_rec_pid, get_path_type
from metrics import *

def sort_by_LIR(path_full):
    return LIR_single(path_full[2])

def sort_by_SEP(path_full):
    return SEP_single(path_full[2])

# Soft optimization LIR, for every user topk predicted by the baseline,
# get the predicted paths for every item and change the explanation according to LIR motivations
def soft_optimization_LIR(path_data):
    pred_paths = path_data.pred_paths
    for uid, topk in path_data.uid_topk.items():

        #Retrive topk explanations without changin the selected pids
        for pid in topk:
            pred_paths[uid][pid].sort(key=lambda x: LIR_single(path_data, x[-1]), reverse=True)
            path_data.uid_pid_explanation[uid][pid] = pred_paths[uid][pid][0][-1]

# Soft optimization LIR, for every user topk predicted by the baseline,
# get the predicted paths for every item and change the explanation according to SEP motivations
def soft_optimization_SEP(path_data):
    pred_path = path_data.pred_paths

    for uid, topk in path_data.uid_topk.items():

        #Retrive topk explanations without changin the selected pids
        for pid in topk:
            pred_path[uid][pid].sort(key=lambda x: SEP_single(path_data, x[-1]), reverse=True)
            path_data.uid_pid_explanation[uid][pid] = pred_path[uid][pid][0][-1]


def soft_optimization_ETD(path_data):
    pred_path = path_data.pred_paths
    for uid, topk in path_data.uid_topk.items():
        path_data.uid_pid_explanation[uid] = {}
        ptype_seen = set()
        for pid in topk:
            curr_size = len(path_data.uid_pid_explanation[uid])
            current_item_pred_paths = pred_path[uid][pid]
            current_item_pred_paths.sort(key=lambda x: x[1]) #Sort for probability
            for path in current_item_pred_paths:
                ptype = get_path_type(path[-1])
                if ptype not in ptype_seen:
                    path_data.uid_pid_explanation[uid][pid] = path[-1]
                    ptype_seen.add(ptype)
                    break
                    # No different path have been found
            if curr_size == len(path_data.uid_pid_explanation[uid]):
                path_data.uid_pid_explanation[uid][pid] = current_item_pred_paths[0][-1]

#LIR Alpha optimization
def weighted_opt_LIR(path_data, alpha):
    pred_path = path_data.pred_paths

    #Pred_paths {uid: {pid: [[path_score, path_prob_path], ..., [path_score, path_prob_path]], ...,
    # pidn: [[path_score, path_prob_path], ..., [path_score, path_prob_path]]]}, ..., uidn: {pid: ...}}
    for uid, pid_list in pred_path.items():
        candidates = []
        best_candidates = []
        best_candidates_pids = set()

        #Create candidate list
        for pid, path_list in pid_list.items():
            candidates.extend(path_list)

        candidates.sort(key=lambda candidate: (candidate[0] * (1-alpha)) + (LIR_single(path_data, candidate[-1]) * alpha), reverse=True)

        #Pick the best items
        for candidate in candidates:
            rec_pid = get_rec_pid(candidate)
            if rec_pid in best_candidates_pids: continue
            best_candidates.append(candidate)
            best_candidates_pids.add(rec_pid)
            if len(best_candidates) == 10: break

        #if len(best_candidates) < 10:
        #    print("LSEPS THAN 10!")
        #Reorder topk by path_score
        best_candidates.sort(key=lambda candidate: candidate[0],
                             reverse=True)
        # Update the topk with the reranked one
        path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
        path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
    return path_data

#SEP Alpha optimization
def weighted_opt_SEP(path_data, alpha):
    pred_paths = path_data.pred_paths

    for uid, pid_list in pred_paths.items():
        candidates = []
        best_candidates = []
        best_candidates_pids = set()

        for pid, path_list in pid_list.items():
            path_list.sort(key=lambda x: x[1], reverse=True)
            candidates.extend(path_list)

        candidates.sort(key=lambda x: (x[0] * (1-alpha)) + (SEP_single(path_data, x[-1]) * alpha), reverse=True)

        #Pick the best items
        for candidate in candidates:
            rec_pid = get_rec_pid(candidate)
            if rec_pid in best_candidates_pids: continue
            best_candidates.append(candidate)
            best_candidates_pids.add(rec_pid)
            if len(best_candidates) == 10: break

        #if len(best_candidates) < 10:
        #    print("LSEPS THAN 10!")

        #Reorder topk by path_score
        best_candidates.sort(key=lambda candidate: candidate[0],
                             reverse=True)
        # Update the topk with the reranked one
        path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
        path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
    return path_data
'''
#ETD Alpha optimization
def weighted_opt_ETD(path_data, alpha):
    pred_path = path_data.pred_paths
    for uid, pid_list in pred_path.items():
        candidates = []
        best_candidates = []
        best_candidates_pids = set()

        #Populate candidate list
        for pid, path_list in pid_list.items():
            path_list = pred_path[uid][pid]
            candidates.extend(path_list)

        # Create a bin for every path type and insert in them every path of that type
        bins = {}
        for candidate in candidates:
            candidate_path_type = get_path_type(candidate[-1])
            if get_path_type(candidate[-1]) not in bins:
                bins[candidate_path_type] = []
            bins[candidate_path_type].append(candidate)

        # Sort every path type bin by a mixed score based on explanation time relevance and item relevance
        for bin_type, path_list in bins.items():
            path_list.sort(key=lambda x: x[0],
                           reverse=True)

        #Search the best for path_score among all the top of every bin, if the paths isn't already seen in the best_candidate give him a alpha bonus
        ptype_seen = set()
        while len(best_candidates) < 10:
            best_type = ""
            best_score = -1
            for bin_type, path_list in bins.items():
                if len(path_list) == 0: continue
                candidate = path_list[0]
                rec_pid = get_rec_pid(path_list[0][-1])
                while rec_pid in best_candidates_pids:
                    path_list.pop(0)
                    if len(path_list) == 0: break
                    candidate = path_list[0]
                    rec_pid = get_rec_pid(candidate[-1])
                if len(path_list) == 0: continue
                bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
                score = candidate[0] + bonus
                if score > best_score:
                    best_score = score
                    best_type = get_path_type(candidate[-1])
            if best_type == "":
            #    print("Less than 10: {}".format(len(best_candidates)))
               break
            best_candidates.append(bins[best_type][0])
            best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
            ptype_seen.add(best_type)
            bins[best_type].pop(0)
            if len(bins[best_type]) == 0:
                bins.pop(best_type)

        # Rearrange the topk based on the metric
        best_candidates.sort(key=lambda x: x[0],
                             reverse=True)

        # Update the topk with the reranked one
        path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
        path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
    return path_data
'''
def weighted_opt_ETD(path_data, alpha):
    pred_path = path_data.pred_paths
    for uid, pid_list in pred_path.items():
        candidates = []
        candidates_pids = {}
        best_candidates = []
        best_candidates_pids = set()

        #Populate candidate list
        for pid, path_list in pid_list.items():
            for path in path_list:
                if pid not in candidates_pids:
                    candidates_pids[pid] = {}
                path_type = get_path_type(path[-1])
                if path_type not in candidates_pids[pid]:
                    candidates_pids[pid][path_type] = []
                candidates_pids[pid][path_type].append(path)

        for pid, path_type_paths in candidates_pids.items():
            for path_type, paths in path_type_paths.items():
                paths.sort(key=lambda x: x[1], #redudant paths should be already sorted by prob
                           reverse=True)
                candidates.append(paths[0])
        # Create a bin for every path type and insert in them every path of that type
        bins = {}
        for candidate in candidates:
            candidate_path_type = get_path_type(candidate[-1])
            if get_path_type(candidate[-1]) not in bins:
                bins[candidate_path_type] = []
            bins[candidate_path_type].append(candidate)

        # Sort every path type bin by a mixed score based on explanation time relevance and item relevance
        for bin_type, path_list in bins.items():
            path_list.sort(key=lambda x: x[0],
                           reverse=True)

        #Search the best for path_score among all the top of every bin, if the paths isn't already seen in the best_candidate give him a alpha bonus
        ptype_seen = set()
        while len(best_candidates) < 10:
            best_type = ""
            best_score = -1
            for bin_type, path_list in bins.items():
                if len(path_list) == 0: continue
                candidate = path_list[0]
                rec_pid = get_rec_pid(path_list[0][-1])
                while rec_pid in best_candidates_pids:
                    path_list.pop(0)
                    if len(path_list) == 0: break
                    candidate = path_list[0]
                    rec_pid = get_rec_pid(candidate[-1])
                if len(path_list) == 0: continue
                bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
                score = candidate[0] + bonus
                if score > best_score:
                    best_score = score
                    best_type = get_path_type(candidate[-1])
            if best_type == "":
                #print("Less than 10: {}".format(len(best_candidates)))
                break
            best_candidates.append(bins[best_type][0])
            best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
            ptype_seen.add(best_type)
            bins[best_type].pop(0)
            if len(bins[best_type]) == 0:
                bins.pop(best_type)

        # Rearrange the topk based on the metric
        best_candidates.sort(key=lambda x: x[0],
                             reverse=True)

        # Update the topk with the reranked one
        path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
        path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
    return path_data
#LIR+SEP Optimization
def weighted_opt_LIR_SEP(path_data, alpha):
    pred_path = path_data.pred_paths

    for uid, pid_list in pred_path.items():
        candidates = []
        best_candidates = []
        best_candidates_pids = set()

        # Create candidate list
        for pid, path_list in pid_list.items():
            candidates.extend(path_list)

        # Normalize scores between 0 and 1
        scores_list = [candidate[0] for candidate in candidates]
        min_score = min(scores_list)
        max_score = max(scores_list)
        for candidate in candidates:
            scale = max_score - min_score if max_score - min_score > 0. else 0.000001
            candidate[0] = (candidate[0] - min_score) / scale

        candidates.sort(key=lambda x: (x[0] * (1-alpha)) + ((LIR_single(path_data, x[-1]) + SEP_single(path_data, x[-1])) * alpha),
                        reverse=True)

        # Pick the best items
        for candidate in candidates:
            rec_pid = get_rec_pid(candidate)
            if rec_pid in best_candidates_pids: continue
            best_candidates.append(candidate)
            best_candidates_pids.add(rec_pid)
            if len(best_candidates) == 10: break

        #if len(best_candidates) < 10:
        #    print("LSEPS THAN 10!")

        # Reorder topk by path_score
        best_candidates.sort(key=lambda candidate: candidate[0], reverse=True)
        # Update the topk with the reranked one
        path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
        path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
    return path_data

def weighted_opt_ETD_LIR(path_data, alpha):
    pred_path = path_data.pred_paths
    for uid, pid_list in pred_path.items():
        candidates = []
        best_candidates = []
        best_candidates_pids = set()

        for pid, path_list in pid_list.items():
            path_list = pred_path[uid][pid]
            candidates.extend(path_list)

        # Create a bin for every path type and insert in them every path of that type
        bins = {}
        for candidate in candidates:
            candidate_path_type = get_path_type(candidate[-1])
            if get_path_type(candidate[-1]) not in bins:
                bins[candidate_path_type] = []
            bins[candidate_path_type].append(candidate)

        # Sort every path type bin by a mixed score based on explanation time relevance and item relevance
        for bin_type, path_list in bins.items():
            path_list.sort(key=lambda x: (x[0] * (1-alpha)) + (LIR_single(path_data, x[-1]) * alpha),
                           reverse=True)

        ptype_seen = set()
        while len(best_candidates) < 10:
            best_type = ""
            best_score = -1
            for bin_type, path_list in bins.items():
                if len(path_list) == 0: continue
                candidate = path_list[0]
                rec_pid = get_rec_pid(path_list[0][-1])
                while rec_pid in best_candidates_pids:
                    path_list.pop(0)
                    if len(path_list) == 0: break
                    candidate = path_list[0]
                    rec_pid = get_rec_pid(candidate[-1])
                if len(path_list) == 0: continue
                bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
                score = candidate[0] + bonus
                if score > best_score:
                    best_score = score
                    best_type = get_path_type(candidate[-1])
            if best_type == "":
            #    print("Less than 10: {}".format(len(best_candidates)))
               break
            best_candidates.append(bins[best_type][0])
            best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
            ptype_seen.add(best_type)
            bins[best_type].pop(0)
            if len(bins[best_type]) == 0:
                bins.pop(best_type)

        # Rearrange the topk based on path_score
        best_candidates.sort(key=lambda candidate: candidate[0], reverse=True)

        # Update the topk with the reranked one
        path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
        path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
    return path_data

#ETD+SEP Alpha optimization
def weighted_opt_ETD_SEP(path_data, alpha):
    pred_path = path_data.pred_paths
    for uid, pid_list in pred_path.items():
        candidates = []
        best_candidates = []
        best_candidates_pids = set()

        for pid, path_list in pid_list.items():
            path_list = pred_path[uid][pid]
            candidates.extend(path_list)

        # Create a bin for every path type and insert in them every path of that type
        bins = {}
        for candidate in candidates:
            candidate_path_type = get_path_type(candidate[-1])
            if get_path_type(candidate[-1]) not in bins:
                bins[candidate_path_type] = []
            bins[candidate_path_type].append(candidate)

        # Sort every path type bin by a mixed score based on explanation time relevance and item relevance
        for bin_type, path_list in bins.items():
            path_list.sort(key=lambda x: (x[0] * (1-alpha)) + (SEP_single(path_data, x[-1]) * alpha),
                           reverse=True)

        ptype_seen = set()
        while len(best_candidates) < 10:
            best_type = ""
            best_score = -1
            for bin_type, path_list in bins.items():
                if len(path_list) == 0: continue
                candidate = path_list[0]
                rec_pid = get_rec_pid(path_list[0][-1])
                while rec_pid in best_candidates_pids:
                    path_list.pop(0)
                    if len(path_list) == 0: break
                    candidate = path_list[0]
                    rec_pid = get_rec_pid(candidate[-1])
                if len(path_list) == 0: continue
                bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
                score = candidate[0] + bonus
                if score > best_score:
                    best_score = score
                    best_type = get_path_type(candidate[-1])
            if best_type == "":
            #    print("Less than 10: {}".format(len(best_candidates)))
               break
            best_candidates.append(bins[best_type][0])
            best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
            ptype_seen.add(best_type)
            bins[best_type].pop(0)
            if len(bins[best_type]) == 0:
                bins.pop(best_type)

        # Rearrange the topk based on the metric
        best_candidates.sort(key=lambda x: x[0],
                             reverse=True)

        # Update the topk with the reranked one
        path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
        path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
    return path_data

#ETD+SEP+LIR Alpha optimization
def weighted_opt_ETD_SEP_LIR(path_data, alpha):
    pred_path = path_data.pred_paths
    for uid, pid_list in pred_path.items():
        candidates = []
        best_candidates = []
        best_candidates_pids = set()

        for pid, path_list in pid_list.items():
            path_list = pred_path[uid][pid]
            candidates.extend(path_list)

        # Create a bin for every path type and insert in them every path of that type
        bins = {}
        for candidate in candidates:
            candidate_path_type = get_path_type(candidate[-1])
            if get_path_type(candidate[-1]) not in bins:
                bins[candidate_path_type] = []
            bins[candidate_path_type].append(candidate)

        # Sort every path type bin by a mixed score based on explanation time relevance and item relevance
        for bin_type, path_list in bins.items():
            path_list.sort(
                key=lambda x: (x[0] * (1 - alpha)) + ((LIR_single(path_data, x[-1]) + SEP_single(path_data, x[-1])) * alpha),
                reverse=True)

        ptype_seen = set()
        while len(best_candidates) < 10:
            best_type = ""
            best_score = -1
            for bin_type, path_list in bins.items():
                if len(path_list) == 0: continue
                candidate = path_list[0]
                rec_pid = get_rec_pid(path_list[0][-1])
                while rec_pid in best_candidates_pids:
                    path_list.pop(0)
                    if len(path_list) == 0: break
                    candidate = path_list[0]
                    rec_pid = get_rec_pid(candidate[-1])
                if len(path_list) == 0: continue
                bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
                score = candidate[0] + bonus
                if score > best_score:
                    best_score = score
                    best_type = get_path_type(candidate[-1])
            if best_type == "":
                #    print("Less than 10: {}".format(len(best_candidates)))
                break
            best_candidates.append(bins[best_type][0])
            best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
            ptype_seen.add(best_type)
            bins[best_type].pop(0)
            if len(bins[best_type]) == 0:
                bins.pop(best_type)

        # Rearrange the topk based on the metric
        best_candidates.sort(key=lambda x: x[0],
                             reverse=True)

        # Update the topk with the reranked one
        path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
        path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
    return path_data