Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into leaf_samples_dist…
Browse files Browse the repository at this point in the history
…ribution

# Conflicts:
#	notebooks/tree_structure_example.ipynb
  • Loading branch information
tlapusan committed Oct 28, 2019
2 parents d5f6b06 + 945b31b commit da16f97
Show file tree
Hide file tree
Showing 24 changed files with 36,671 additions and 70,952 deletions.
3 changes: 3 additions & 0 deletions dtreeviz/colors.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@

COLORS = {'scatter_edge': GREY,
'scatter_marker': BLUE,
'scatter_marker_alpha': 0.5,
'tesselation_alpha': 0.3,
'tesselation_alpha_3D': 0.5,
'split_line': GREY,
'mean_line': '#f46d43',
'axis_label': GREY,
Expand Down
69 changes: 35 additions & 34 deletions dtreeviz/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def rtreeviz_univar(ax=None,
show={'title','splits'},
split_linewidth=.5,
mean_linewidth = 2,
markersize=None,
markersize=15,
colors=None):
if isinstance(x_train, pd.Series):
x_train = x_train.values
Expand Down Expand Up @@ -127,7 +127,7 @@ def rtreeviz_univar(ax=None,
inrange = y_train[(x_train >= left) & (x_train <= right)]
means.append(np.mean(inrange))

ax.scatter(x_train, y_train, marker='o', alpha=.4, c=colors['scatter_marker'], s=markersize,
ax.scatter(x_train, y_train, marker='o', alpha=colors['scatter_marker_alpha'], c=colors['scatter_marker'], s=markersize,
edgecolor=colors['scatter_edge'], lw=.3)

if 'splits' in show:
Expand All @@ -146,18 +146,19 @@ def rtreeviz_univar(ax=None,

if 'title' in show:
title = f"Regression tree depth {max_depth}, samples per leaf {min_samples_leaf},\nTraining $R^2$={t.score(x_train.reshape(-1, 1), y_train):.3f}"
plt.title(title, fontsize=fontsize, color=colors['title'])
ax.set_title(title, fontsize=fontsize, color=colors['title'])

plt.xlabel(feature_name, fontsize=fontsize, color=colors['axis_label'])
plt.ylabel(target_name, fontsize=fontsize, color=colors['axis_label'])
ax.set_xlabel(feature_name, fontsize=fontsize, color=colors['axis_label'])
ax.set_ylabel(target_name, fontsize=fontsize, color=colors['axis_label'])


def rtreeviz_bivar_heatmap(ax=None, X_train=None, y_train=None, max_depth=10, feature_names=None,
fontsize=14, ticks_fontsize=12, fontname="Arial",
show={'title'},
n_colors_in_map=100,
colors=None
) -> tree.DecisionTreeClassifier:
colors=None,
markersize = 15
) -> tree.DecisionTreeClassifier:
"""
Show tesselated 2D feature space for bivariate regression tree. X_train can
have lots of features but features lists indexes of 2 features to train tree with.
Expand Down Expand Up @@ -196,13 +197,13 @@ def rtreeviz_bivar_heatmap(ax=None, X_train=None, y_train=None, max_depth=10, fe
y = bbox[1]
w = bbox[2] - bbox[0]
h = bbox[3] - bbox[1]
rect = patches.Rectangle((x, y), w, h, 0, linewidth=.3, alpha=.5,
rect = patches.Rectangle((x, y), w, h, 0, linewidth=.3, alpha=colors['tesselation_alpha'],
edgecolor=colors['edge'], facecolor=color)
ax.add_patch(rect)

color_map = [color_map[int(((y-y_lim[0])/y_range)*(n_colors_in_map-1))] for y in y_train]
x, y, z = X_train[:,0], X_train[:,1], y_train
ax.scatter(x, y, marker='o', alpha=.95, c=color_map, edgecolor=colors['scatter_edge'], lw=.3)
ax.scatter(x, y, marker='o', c=color_map, edgecolor=colors['scatter_edge'], lw=.3, s=markersize)

ax.set_xlabel(f"{feature_names[0]}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])
ax.set_ylabel(f"{feature_names[1]}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])
Expand All @@ -212,7 +213,7 @@ def rtreeviz_bivar_heatmap(ax=None, X_train=None, y_train=None, max_depth=10, fe
if 'title' in show:
accur = rt.score(X_train, y_train)
title = f"Regression tree depth {max_depth}, training $R^2$={accur:.3f}"
plt.title(title, fontsize=fontsize, color=colors['title'])
ax.set_title(title, fontsize=fontsize, color=colors['title'])

return None

Expand All @@ -222,6 +223,7 @@ def rtreeviz_bivar_3D(ax=None, X_train=None, y_train=None, max_depth=10, feature
azim=0, elev=0, dist=7,
show={'title'},
colors=None,
markersize=15,
n_colors_in_map = 100
) -> tree.DecisionTreeClassifier:
"""
Expand Down Expand Up @@ -254,7 +256,7 @@ def plane(node, bbox):
z = np.full(xx.shape, node.prediction())
# print(f"{node.prediction()}->{int(((node.prediction()-y_lim[0])/y_range)*(n_colors_in_map-1))}, lim {y_lim}")
# print(f"{color_map[int(((node.prediction()-y_lim[0])/y_range)*(n_colors_in_map-1))]}")
ax.plot_surface(xx, yy, z, alpha=.85, shade=False,
ax.plot_surface(xx, yy, z, alpha=colors['tesselation_alpha_3D'], shade=False,
color=color_map[int(((node.prediction()-y_lim[0])/y_range)*(n_colors_in_map-1))],
edgecolor=colors['edge'], lw=.3)

Expand All @@ -274,7 +276,7 @@ def plane(node, bbox):
plane(node, bbox)

x, y, z = X_train[:, 0], X_train[:, 1], y_train
ax.scatter(x, y, z, marker='o', alpha=.7, edgecolor=colors['scatter_edge'], lw=.3, c=color_map)
ax.scatter(x, y, z, marker='o', alpha=colors['scatter_marker_alpha'], edgecolor=colors['scatter_edge'], lw=.3, c=color_map, s=markersize)

ax.set_xlabel(f"{feature_names[0]}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])
ax.set_ylabel(f"{feature_names[1]}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])
Expand All @@ -285,7 +287,7 @@ def plane(node, bbox):
if 'title' in show:
accur = rt.score(X_train, y_train)
title = f"Regression tree depth {max_depth}, training $R^2$={accur:.3f}"
plt.title(title, fontsize=fontsize, color=colors['title'])
ax.set_title(title, fontsize=fontsize, color=colors['title'])

return None

Expand Down Expand Up @@ -366,7 +368,7 @@ def ctreeviz_univar(ax=None, x_train=None, y_train=None, feature_name=None, clas
ax.set_ylim(0, mu + n_classes*class_step)
for i, bucket in enumerate(X_hist):
y_noise = np.random.normal(mu+i*class_step, sigma, size=len(bucket))
ax.scatter(bucket, y_noise, alpha=.7, marker='o', s=dot_w, c=color_map[i],
ax.scatter(bucket, y_noise, alpha=colors['scatter_marker_alpha'], marker='o', s=dot_w, c=color_map[i],
edgecolors=colors['scatter_edge'], lw=.3)

ax.tick_params(axis='both', which='major', width=.3, labelcolor=colors['tick_label'],
Expand All @@ -376,30 +378,29 @@ def ctreeviz_univar(ax=None, x_train=None, y_train=None, feature_name=None, clas
splits = sorted(splits)
bins = [ax.get_xlim()[0]] + splits + [ax.get_xlim()[1]]

pred_box_height = .07 * ax.get_ylim()[1]
preds = []
for i in range(len(bins) - 1):
left = bins[i]
right = bins[i + 1]
inrange = y_train[(x_train >= left) & (x_train <= right)]
values, counts = np.unique(inrange, return_counts=True)
pred = values[np.argmax(counts)]
rect = patches.Rectangle((left, 0), (right - left), pred_box_height, linewidth=.3,
edgecolor=colors['edge'], facecolor=color_map[pred])
ax.add_patch(rect)
preds.append(pred)
if 'splits' in show: # this gets the horiz bars showing prediction region
pred_box_height = .07 * ax.get_ylim()[1]
for i in range(len(bins) - 1):
left = bins[i]
right = bins[i + 1]
inrange = y_train[(x_train >= left) & (x_train <= right)]
values, counts = np.unique(inrange, return_counts=True)
pred = values[np.argmax(counts)]
rect = patches.Rectangle((left, 0), (right - left), pred_box_height, linewidth=.3,
edgecolor=colors['edge'], facecolor=color_map[pred])
ax.add_patch(rect)

if 'legend' in show:
add_classifier_legend(ax, class_names, class_values, color_map, target_name, colors)

if 'title' in show:
accur = ct.score(x_train.reshape(-1, 1), y_train)
title = f"Classifier tree depth {max_depth}, training accuracy={accur*100:.2f}%"
plt.title(title, fontsize=fontsize, color=colors['title'])
ax.set_title(title, fontsize=fontsize, color=colors['title'])

if 'splits' in show:
for split in splits:
plt.plot([split, split], [*ax.get_ylim()], '--', color=colors['split_line'], linewidth=1)
ax.plot([split, split], [*ax.get_ylim()], '--', color=colors['split_line'], linewidth=1)


def ctreeviz_bivar(ax=None, X_train=None, y_train=None, feature_names=None, class_names=None,
Expand Down Expand Up @@ -454,14 +455,14 @@ def ctreeviz_bivar(ax=None, X_train=None, y_train=None, feature_names=None, clas
y = bbox[1]
w = bbox[2]-bbox[0]
h = bbox[3]-bbox[1]
rect = patches.Rectangle((x, y), w, h, 0, linewidth=.3, alpha=.4,
rect = patches.Rectangle((x, y), w, h, 0, linewidth=.3, alpha=colors['tesselation_alpha'],
edgecolor=colors['rect_edge'], facecolor=color_map[node.prediction()])
ax.add_patch(rect)

dot_w = 25
X_hist = [X_train[y_train == cl] for cl in class_values]
for i, h in enumerate(X_hist):
ax.scatter(h[:,0], h[:,1], alpha=1, marker='o', s=dot_w, c=color_map[i],
ax.scatter(h[:,0], h[:,1], marker='o', s=dot_w, c=color_map[i],
edgecolors=colors['scatter_edge'], lw=.3)

ax.set_xlabel(f"{feature_names[0]}", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])
Expand All @@ -476,7 +477,7 @@ def ctreeviz_bivar(ax=None, X_train=None, y_train=None, feature_names=None, clas
if 'title' in show:
accur = ct.score(X_train, y_train)
title = f"Classifier tree depth {max_depth}, training accuracy={accur*100:.2f}%"
plt.title(title, fontsize=fontsize, color=colors['title'],)
ax.set_title(title, fontsize=fontsize, color=colors['title'],)

return None

Expand Down Expand Up @@ -936,7 +937,7 @@ def class_split_viz(node: ShadowDecTreeNode,
dot_w = 20
ax.set_ylim(0, mu + n_classes * class_step)
for i, bucket in enumerate(X_hist):
alpha = .6 if len(bucket) > 10 else 1
alpha = colors['scatter_marker_alpha'] if len(bucket) > 10 else 1
y_noise = np.random.normal(mu + i * class_step, sigma, size=len(bucket))
ax.scatter(bucket, y_noise, alpha=alpha, marker='o', s=dot_w, c=colors[i],
edgecolors=colors['edge'], lw=.3)
Expand Down Expand Up @@ -1065,7 +1066,7 @@ def regr_split_viz(node: ShadowDecTreeNode,
xticks += [node.split()]
ax.set_xticks(xticks)

ax.scatter(X_feature, y_train, s=5, c=colors['scatter_marker'], alpha=.4, lw=.3)
ax.scatter(X_feature, y_train, s=5, c=colors['scatter_marker'], alpha=colors['scatter_marker_alpha'], lw=.3)
left, right = node.split_samples()
left = y_train[left]
right = y_train[right]
Expand Down Expand Up @@ -1140,7 +1141,7 @@ def regr_leaf_viz(node : ShadowDecTreeNode,
sigma = .08
X = np.random.normal(mu, sigma, size=len(y))
ax.set_xlim(0, 1)
alpha = .25
alpha = colors['scatter_marker_alpha'] # was .25

ax.scatter(X, y, s=5, c=colors['scatter_marker'], alpha=alpha, lw=.3)
ax.plot([0,len(node.samples())],[m,m],'--', color=colors['split_line'], linewidth=1)
Expand Down
130 changes: 65 additions & 65 deletions notebooks/colors.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit da16f97

Please sign in to comment.