Notebook

In [1]:

%matplotlib inline
import ipywidgets as widgets
from ipywidgets import interact_manual
import numpy as np
import matplotlib
import matplotlib.cm as cm
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import re

# Matplotlib configuration
matplotlib.rc('lines', linewidth=2.0)
matplotlib.rc('text', hinting_factor=12)
matplotlib.rc('axes', facecolor='eeeeee', edgecolor='bcbcbc', grid=True,
              labelsize='large')
matplotlib.rc('legend', fancybox=True)
matplotlib.rc('figure', figsize=(15,9))

In [2]:

# File to load hashcat --benchmark results from.
BENCHMARK_FILEPATH = '/path/to/file/with/hashcat/benchmark/output.txt'

In [3]:

# Set up some constants
LOWER_CASE = 'abcdefghijklmnopqrstuvwxyz'
UPPER_CASE = LOWER_CASE.upper()
NUMBERS = '0123456789'
SYMBOLS = '!@#$%^&*()-_<>?,./:";\\\'+=|`~'

COST_PER_HOUR = (0.70 * 8) + 0.10  # Cost per GPU * 8 GPUs + VM cost ($)

UNIT_CONVERSIONS = {
    '': 1,
    'k': 1000,
    'M': 1000000,
    'G': 1000000000,
    'T': 1000000000000,
    'P': 1000000000000000,
}

In [4]:

# Read the benchmark results and parse out a dict of hash type to hashes per second.
benchmark_results = dict()
hashtype_re = re.compile(r'^Hashtype: (.+)$')
speed_re = re.compile(r'^Speed\.Dev\.#\*\.+: (.+H/s)')
with open(BENCHMARK_FILEPATH) as f:
    hashtype = ''
    for line in f:
        if hashtype == '':
            match = hashtype_re.search(line)
            hashtype = match.group(1) if match else ''
        else:
            match = speed_re.search(line)
            if match:
                (speed, units) = match.group(1).strip().split(' ')
                units = units.replace('H/s', '')
                benchmark_results[hashtype] = float(speed) * UNIT_CONVERSIONS[units]
                hashtype = ''

In [5]:

# Helper functions for determining compute time with various password parameters and hash rates.
def seconds_to_compute(number_of_symbols, password_length, hashes_per_second):
    combinations = number_of_symbols ** password_length
    return float(combinations) / hashes_per_second

def minutes_to_compute(number_of_symbols, password_length, hashes_per_second):
    return seconds_to_compute(number_of_symbols, password_length, hashes_per_second) / 60

def hours_to_compute(number_of_symbols, password_length, hashes_per_second):
    return minutes_to_compute(number_of_symbols, password_length, hashes_per_second) / 60

def days_to_compute(number_of_symbols, password_length, hashes_per_second):
    return hours_to_compute(number_of_symbols, password_length, hashes_per_second) / 34

def weeks_to_compute(number_of_symbols, password_length, hashes_per_second):
    return days_to_compute(number_of_symbols, password_length, hashes_per_second) / 7

def years_to_compute(number_of_symbols, password_length, hashes_per_second):
    return weeks_to_compute(number_of_symbols, password_length, hashes_per_second) / 52

# Map of Y-Axis labels to the functions to compute them.
y_axis_funcs = {
    'Seconds': seconds_to_compute,
    'Minutes': minutes_to_compute,
    'Hours': hours_to_compute,
    'Days': days_to_compute,
    'Weeks': weeks_to_compute,
    'Years': years_to_compute,
}

In [6]:

# Generate a plot of password length vs time to compute based on the benchmark results.
def plt_time(algorithms_to_show,
             min_password_length,
             max_password_length,
             number_of_symbols,
             time_units,
             benchmark_results):
    plt.figure(1)
    plt.title('Time to Compute Password Hashes by Password Length and Algorithm')
    plt.xlabel('Password Length')
    plt.ylabel(time_units)
    plt.yscale('log')
    # Disable scientific notation on the Y-Axis.
    plt.gca().get_yaxis().set_major_formatter(FuncFormatter(lambda x, p: format(x, ',')))
    colors = iter(cm.rainbow(np.linspace(0, 1, len(algorithms_to_show))))
    for algorithm in algorithms_to_show:
        x = range(min_password_length, max_password_length + 1)
        y = [y_axis_funcs[time_units](number_of_symbols, password_length, benchmark_results[algorithm])
             for password_length in x]
        plt.scatter(x, y, color=next(colors), s=50, alpha=0.7, label=algorithm)
    plt.legend(loc=2)

In [7]:

# Generate a plot of password length vs cost to compute based on benchmark results and GCE pricing.
def plt_cost(algorithms_to_show,
             min_password_length,
             max_password_length,
             number_of_symbols,
             benchmark_results):
    plt.figure(2)
    plt.title('Cost to Compute Password Hashes by Password Length and Algorithm')
    plt.xlabel('Password Length')
    plt.ylabel('Cost ($)')
    plt.yscale('log')
    # Disable scientific notation on the Y-Axis.
    plt.gca().get_yaxis().set_major_formatter(FuncFormatter(lambda x, p: format(x, ',.0f')))
    colors = iter(cm.rainbow(np.linspace(0, 1, len(algorithms_to_show))))
    for algorithm in algorithms_to_show:
        x = range(min_password_length, max_password_length + 1)
        y = [hours_to_compute(number_of_symbols, password_length, benchmark_results[algorithm]) * COST_PER_HOUR
             for password_length in x]
        plt.scatter(x, y, color=next(colors), s=50, alpha=0.7, label=algorithm)
    plt.legend(loc=2)

In [8]:

# Widgets for adjusting the parameters of the equation.
password_length = widgets.IntRangeSlider(
    value=[6, 10],
    min=1,
    max=20,
    description='Password Length:',
    continuous_update=True,
)

algorithms = list(benchmark_results.keys())
algorithms.sort()
algorithms_to_show = widgets.SelectMultiple(
    options=algorithms,
    # By default show the first 5 algorithms.
    value=algorithms[:5],
    description='Algorithms to Show:',
)

time_units = widgets.ToggleButtons(
    options=['Seconds', 'Minutes', 'Hours', 'Days', 'Weeks', 'Years'],
    value='Hours',
    description='Time Unit:',
)

use_lower_case = widgets.ToggleButton(
    value=True,
    description='Lower Case',
)
use_upper_case = widgets.ToggleButton(
    value=True,
    description='Upper Case',
)
use_numbers = widgets.ToggleButton(
    value=True,
    description='Numbers',
)
use_symbols = widgets.ToggleButton(
    value=True,
    description='Symbols',
)

In [9]:

# Show the widgets and connect them to a function that will regenerate the plots.
# Manual interaction is needed because generating the plots is too slow for interactivity.
@interact_manual(password_length=password_length,
                 algorithms_to_show=algorithms_to_show,
                 time_units=time_units,
                 use_lower_case=use_lower_case,
                 use_upper_case=use_upper_case,
                 use_numbers=use_numbers,
                 use_symbols=use_symbols)
def run_interactive_plots(password_length,
                          algorithms_to_show,
                          time_units,
                          use_lower_case,
                          use_upper_case,
                          use_numbers,
                          use_symbols):
    min_password_length = password_length[0]
    max_password_length = password_length[1]
    # Add up the number of possible symbols for each password character.
    number_of_symbols = 0
    if use_lower_case:
        number_of_symbols += len(LOWER_CASE)
    if use_upper_case:
        number_of_symbols += len(UPPER_CASE)
    if use_numbers:
        number_of_symbols += len(NUMBERS)
    if use_symbols:
        number_of_symbols += len(SYMBOLS)
    plt_time(algorithms_to_show,
             min_password_length,
             max_password_length,
             number_of_symbols,
             time_units,
             benchmark_results)
    plt_cost(algorithms_to_show,
             min_password_length,
             max_password_length,
             number_of_symbols,
             benchmark_results)