Skip to content

Instantly share code, notes, and snippets.

@muellerzr
Last active April 19, 2023 04:06
Show Gist options
  • Save muellerzr/0634c486bd4939049314632ccc3f7a03 to your computer and use it in GitHub Desktop.
Save muellerzr/0634c486bd4939049314632ccc3f7a03 to your computer and use it in GitHub Desktop.
LR Suggestors.ipynb
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@muellerzr
Copy link
Author

muellerzr commented May 4, 2021

All the relevant code as a .py can be copied from here:

from fastai.vision.all import *

#export
def slide(lrs:list, losses:list, num_it:int, lr_diff:int=15, thresh:float=.005, adjust_value:float=1) -> (float, tuple):
    "Suggests a learning rate following an interval slide rule and returns its index"
    losses = to_np(losses)
    loss_grad = np.gradient(losses)

    r_idx = -1
    l_idx = r_idx - lr_diff
    local_min_lr = lrs[l_idx]
    while (l_idx >= -len(losses)) and (abs(loss_grad[r_idx] - loss_grad[l_idx]) > thresh):
        local_min_lr = lrs[l_idx]
        r_idx -= 1
        l_idx -= 1
    
    suggestion = local_min_lr * adjust_value
    idx = np.interp(np.log10(suggestion), np.log10(lrs), losses)
    return suggestion, (suggestion, idx)

#export
def valley(lrs:list, losses:list, num_it:int) -> (float, tuple):
    "Suggests a learning rate from the longest valley and returns its index"
    n = len(losses)

    max_start, max_end = 0,0

    # find the longest valley
    lds = [1]*n

    for i in range(1,n):
        for j in range(0,i):
            if (losses[i] < losses[j]) and (lds[i] < lds[j] + 1):
                lds[i] = lds[j] + 1
            if lds[max_end] < lds[i]:
                max_end = i
                max_start = max_end - lds[max_end]
    
    sections = (max_end - max_start) / 3
    idx = max_start + int(sections) + int(sections/2)

    return lrs[idx], (lrs[idx], losses[idx])

#export
def minimum(lrs:list, losses:list, num_it:int) -> (float, tuple):
    "Suggests a learning rate one-tenth the minumum before divergance and returns its index"
    lr_min = lrs[losses.argmin()].item()
    loss_idx = losses[min(range(len(lrs)), key=lambda i: abs(lrs[i]-lr_min))]
    return lr_min/10, (lr_min, loss_idx)

#export
def steep(lrs:list, losses:list, num_it:int) -> (float, tuple):
    "Suggests a learning rate when the slope is the steepest and returns its index"
    grads = (losses[1:]-losses[:-1]) / (lrs[1:].log()-lrs[:-1].log())
    lr_steep = lrs[grads.argmin()].item()
    loss_idx = losses[min(range(len(lrs)), key=lambda i: abs(lrs[i]-lr_steep))]
    return lr_steep, (lr_steep, loss_idx)

#export
@patch
def lr_find(self:Learner, start_lr=1e-7, end_lr=10, num_it=100, stop_div=True, show_plot=True, suggest_funcs=(minimum, steep), **kwargs) -> collections.namedtuple:
    "Launch a mock training to find a good learning rate and return suggestions based on `suggest_funcs` as a named tuple"
    n_epoch = num_it//len(self.dls.train) + 1
    cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
    with self.no_logging(): self.fit(n_epoch, cbs=cb)
    if suggest_funcs is not None:
        lrs, losses = tensor(self.recorder.lrs[num_it//10:-5]), tensor(self.recorder.losses[num_it//10:-5])
        _suggestions, nms = [], []
        for func in suggest_funcs:
            nms.append(func.__name__ if not isinstance(func, partial) else func.func.__name__)
            _suggestions.append(func(lrs, losses, num_it))
        
        SuggestedLRs = collections.namedtuple('SuggestedLRs', nms)
        lrs, pnts = [], []
        for lr, pnt in _suggestions:
            lrs.append(lr)
            pnts.append(pnt)
        if show_plot: self.recorder.plot_lr_find(suggestions=pnts, nms=nms)
        return SuggestedLRs(*lrs)

    elif show_plot: self.recorder.plot_lr_find()
    
#export
@patch
def plot_lr_find(self:Recorder, skip_end=5, return_fig=True, suggestions=None, nms=None, **kwargs):
    "Plot the result of an LR Finder test (won't work if you didn't do `learn.lr_find()` before)"
    lrs    = self.lrs    if skip_end==0 else self.lrs   [:-skip_end]
    losses = self.losses if skip_end==0 else self.losses[:-skip_end]
    fig, ax = plt.subplots(1,1)
    ax.plot(lrs, losses)
    ax.set_ylabel("Loss")
    ax.set_xlabel("Learning Rate")
    ax.set_xscale('log')
    if suggestions:
        colors = plt.rcParams['axes.prop_cycle'].by_key()['color'][1:]
        for (val, idx), nm, color in zip(suggestions, nms, colors):
            ax.plot(val, idx, 'ro', label=nm, c=color)
        ax.legend(loc='best')

@benihime91
Copy link

Hi ! First of all nice work !

A small bug in the .py file. Shouldn’t this line

lrs, losses = tensor(learn.recorder.lrs[num_it//10:-5]), tensor(learn.recorder.losses[num_it//10:-5])

be

lrs, losses = tensor(self.recorder.lrs[num_it//10:-5]), tensor(self.recorder.losses[num_it//10:-5])

@muellerzr
Copy link
Author

Yes it should be! Thank you!

@faroit
Copy link

faroit commented Dec 5, 2022

@muellerzr not sure if there is a bug but slide doesn't match with my expectation following the description (note that i can't sweep to a higher lr as i get nans then)

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment