Source code for simple_rl.utils.chart_utils

'''
chart_utils.py: Charting utilities for RL experiments.

Functions:
    load_data: Loads data from csv files into lists.
    average_data: Averages data across instances.
    compute_conf_intervals: Confidence interval computation.
    compute_single_conf_interval: Helper function for above.
    _format_title()
    plot: Creates (and opens) a single plot using matplotlib.pyplot
    make_plots: Puts everything in order to create the plot.
    _get_agent_names: Grabs the agent names the experiment parameter file, named @Experiment.EXP_PARAM_FILE_NAME
    _get_agent_colors: Determines the relevant colors/markers for the plot.
    _is_episodic: Determines if the experiment was episodic from the experiment parameter file, named @Experiment.EXP_PARAM_FILE_NAME
    _is_disc_reward()
    parse_args: Parse command line arguments.
    main: Loads data from a given path and creates plot.

Author: David Abel (cs.brown.edu/~dabel)
'''

# Python imports.
from __future__ import print_function
import math
import decimal
import sys
import os
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as pyplot
import numpy as np
import subprocess
import argparse

color_ls = [[118, 167, 125], [102, 120, 173],\
            [198, 113, 113], [94, 94, 94],\
            [169, 193, 213], [230, 169, 132],\
            [192, 197, 182], [210, 180, 226]]

# Set font.
font = {'size':14}
matplotlib.rc('font', **font)
matplotlib.rcParams['pdf.fonttype'] = 42
# matplotlib.rcParams['text.usetex'] = True
fig = matplotlib.pyplot.gcf()

CUSTOM_TITLE = None
X_AXIS_LABEL = None
Y_AXIS_LABEL = None
X_AXIS_START_VAL = 0
X_AXIS_INCREMENT = 1
Y_AXIS_END_VAL = None

[docs]def load_data(experiment_dir, experiment_agents): ''' Args: experiment_dir (str): Points to the file containing all the data. experiment_agents (list): Points to which results files will be plotted. Returns: result (list): A 3d matrix containing rewards, where the dimensions are [algorithm][instance][episode]. ''' result = [] for alg in experiment_agents: # Load the reward for all instances of each agent all_reward = open(os.path.join(experiment_dir, str(alg)) + ".csv", "r") all_instances = [] # Put the reward instances into a list of floats. for instance in all_reward.readlines(): all_episodes_for_instance = [float(r) for r in instance.split(",")[:-1] if len(r) > 0] if len(all_episodes_for_instance) > 0: all_instances.append(all_episodes_for_instance) result.append(all_instances) return result
[docs]def average_data(data, cumulative=False): ''' Args: data (list): a 3D matrix, [algorithm][instance][episode] cumulative (bool) *opt: determines if we should compute the average cumulative reward/cost or just regular. Returns: (list): a 2D matrix, [algorithm][episode], where the instance rewards have been averaged. ''' num_algorithms = len(data) result = [None for i in range(num_algorithms)] # [Alg][avgRewardEpisode], where avg is summed up to episode i if @cumulative=True for i, all_instances in enumerate(data): # Take the average. num_instances = float(len(data[i])) all_instances_sum = np.array(np.array(all_instances).sum(axis=0)) try: avged = all_instances_sum / num_instances except TypeError: raise ValueError("(simple_rl) Plotting Error: an algorithm was run with inconsistent parameters (likely inconsistent number of Episodes/Instances. Try clearing old data).") if cumulative: # If we're summing over episodes. temp = [] total_so_far = 0 for rew in avged: total_so_far += rew temp.append(total_so_far) avged = temp result[i] = avged return result
[docs]def compute_conf_intervals(data, cumulative=False): ''' Args: data (list): A 3D matrix, [algorithm][instance][episode] cumulative (bool) *opt ''' confidence_intervals_each_alg = [] # [alg][conf_inv_for_episode] for i, all_instances in enumerate(data): num_instances = len(data[i]) num_episodes = len(data[i][0]) all_instances_np_arr = np.array(all_instances) alg_i_ci = [] total_so_far = np.zeros(num_instances) for j in range(num_episodes): # Compute datum for confidence interval. episode_j_all_instances = all_instances_np_arr[:, j] if cumulative: # Cumulative. summed_vector = np.add(episode_j_all_instances, total_so_far) total_so_far = np.add(episode_j_all_instances, total_so_far) episode_j_all_instances = summed_vector # Compute the interval and add it to list. conf_interv = compute_single_conf_interval(episode_j_all_instances) alg_i_ci.append(conf_interv) confidence_intervals_each_alg.append(alg_i_ci) return confidence_intervals_each_alg
[docs]def compute_single_conf_interval(datum): ''' Args: datum (list): A vector of data points to compute the confidence interval of. Returns: (float): Margin of error. ''' std_deviation = np.std(datum) std_error = 1.96*(std_deviation / math.sqrt(len(datum))) return std_error
def _format_title(plot_title): plot_title = plot_title.replace("_", " ") plot_title = plot_title.replace("-", " ") if len(plot_title.split(" ")) > 1: plot_title_final = " ".join([w[0].upper() + w[1:] for w in plot_title.strip().split(" ")]) return plot_title_final
[docs]def plot(results, experiment_dir, agents, plot_file_name="", conf_intervals=[], use_cost=False, cumulative=False, episodic=True, open_plot=True, track_disc_reward=False): ''' Args: results (list of lists): each element is itself the reward from an episode for an algorithm. experiment_dir (str): path to results. agents (list): each element is an agent that was run in the experiment. plot_file_name (str) conf_intervals (list of floats) [optional]: confidence intervals to display with the chart. use_cost (bool) [optional]: If true, plots are in terms of cost. Otherwise, plots are in terms of reward. cumulative (bool) [optional]: If true, plots are cumulative cost/reward. episodic (bool): If true, labels the x-axis "Episode Number". Otherwise, "Step Number". open_plot (bool) track_disc_reward (bool): If true, plots discounted reward. Summary: Makes (and opens) a single reward chart plotting all of the data in @data. ''' # Set x-axis labels to be integers. from matplotlib.ticker import MaxNLocator ax = pyplot.figure().gca() ax.xaxis.set_major_locator(MaxNLocator(integer=True)) # Some nice markers and colors for plotting. markers = ['o', 's', 'D', '^', '*', 'x', 'p', '+', 'v','|'] x_axis_unit = "episode" if episodic else "step" # Map them to floats in [0:1]. colors = [[shade / 255.0 for shade in rgb] for rgb in color_ls] # Puts the legend into the best location in the plot and use a tight layout. pyplot.rcParams['legend.loc'] = 'best' # Negate everything if we're plotting cost. if use_cost: results = [[-x for x in alg] for alg in results] agent_colors = _get_agent_colors(experiment_dir, agents) # Make the plot. print_prefix = "\nAvg. cumulative reward" if cumulative else "Avg. reward" # For each agent. for i, agent_name in enumerate(agents): # Add figure for this algorithm. agent_color_index = i if agent_name not in agent_colors else agent_colors[agent_name] agent_marker_index = agent_color_index # Grab new color/marker if we've gone over. if agent_color_index >= len(colors): agent_color_index = agent_color_index % len(colors) if agent_marker_index >= len(markers): agent_marker_index = agent_marker_index % len(markers) series_color = colors[agent_color_index] series_marker = markers[agent_marker_index] y_axis = results[i] x_axis = list(drange(X_AXIS_START_VAL, X_AXIS_START_VAL + len(y_axis) * X_AXIS_INCREMENT, X_AXIS_INCREMENT)) # Plot Confidence Intervals. if conf_intervals != []: alg_conf_interv = conf_intervals[i] top = np.add(y_axis, alg_conf_interv) bot = np.subtract(y_axis, alg_conf_interv) pyplot.fill_between(x_axis, top, bot, facecolor=series_color, edgecolor=series_color, alpha=0.25) print("\t" + str(agents[i]) + ":", round(y_axis[-1], 5) , "(conf_interv:", round(alg_conf_interv[-1], 2), ")") marker_every = max(len(y_axis) / 30,1) pyplot.plot(x_axis, y_axis, color=series_color, marker=series_marker, markevery=marker_every, label=agent_name) pyplot.legend() print() # Configure plot naming information. unit = "Cost" if use_cost else "Reward" plot_label = "Cumulative" if cumulative else "Average" if "times" in experiment_dir: # If it's a time plot. unit = "Time" disc_ext = "Discounted " if track_disc_reward else "" # Set names. exp_dir_split_list = experiment_dir.split("/") if 'results' in exp_dir_split_list: exp_name = exp_dir_split_list[exp_dir_split_list.index('results') + 1] else: exp_name = exp_dir_split_list[0] experiment_dir = os.path.join(experiment_dir, "") plot_file_name = os.path.join(experiment_dir, plot_file_name + ".pdf") if plot_file_name != "" else experiment_dir + plot_label.lower() + "_" + unit.lower() + ".pdf" plot_title = CUSTOM_TITLE if CUSTOM_TITLE is not None else plot_label + " " + disc_ext + unit + ": " + exp_name if CUSTOM_TITLE is None: plot_title = _format_title(plot_title) # Axis labels. x_axis_label = X_AXIS_LABEL if X_AXIS_LABEL is not None else x_axis_unit[0].upper() + x_axis_unit[1:] + " Number" y_axis_label = Y_AXIS_LABEL if Y_AXIS_LABEL is not None else plot_label + " " + unit # Pyplot calls. pyplot.xlabel(x_axis_label) pyplot.ylabel(y_axis_label) pyplot.title(plot_title) pyplot.grid(True) pyplot.tight_layout() # Keeps the spacing nice. # Save the plot. pyplot.savefig(plot_file_name, format="pdf") if open_plot: # Open it. open_prefix = "gnome-" if sys.platform == "linux" or sys.platform == "linux2" else "" os.system(open_prefix + "open " + plot_file_name) # Clear and close. pyplot.cla() pyplot.close()
[docs]def make_plots(experiment_dir, experiment_agents, plot_file_name="", cumulative=True, use_cost=False, episodic=True, open_plot=True, track_disc_reward=False): ''' Args: experiment_dir (str): path to results. experiment_agents (list): agent names (looks for "<agent-name>.csv"). plot_file_name (str) cumulative (bool): If true, plots show cumulative trr use_cost (bool): If true, plots are in terms of cost. Otherwise, plots are in terms of reward. episodic (bool): If true, labels the x-axis "Episode Number". Otherwise, "Step Number". track_disc_reward (bool): If true, plots discounted reward (changes plot title, too). Summary: Creates plots for all agents run under the experiment. Stores the plot in results/<experiment_name>/<plot_name>.pdf ''' # Load the data. data = load_data(experiment_dir, experiment_agents) # [alg][instance][episode] # Average the data. avg_data = average_data(data, cumulative=cumulative) # Compute confidence intervals. conf_intervals = compute_conf_intervals(data, cumulative=cumulative) # Create plot. plot(avg_data, experiment_dir, experiment_agents, plot_file_name=plot_file_name, conf_intervals=conf_intervals, use_cost=use_cost, cumulative=cumulative, episodic=episodic, open_plot=open_plot, track_disc_reward=track_disc_reward)
[docs]def drange(x_min, x_max, x_increment): ''' Args: x_min (float) x_max (float) x_increment (float) Returns: (generator): Makes a list. Notes: A range function for generating lists of floats. Based on code from stack overflow user Sam Bruns: https://stackoverflow.com/questions/16105485/unsupported-operand-types-for-float-and-decimal ''' x_min = decimal.Decimal(x_min) while x_min < x_max: yield float(x_min) x_min += decimal.Decimal(str(x_increment))
def _get_agent_names(data_dir): ''' Args: data_dir (str) Returns: (list) ''' from simple_rl.experiments import Experiment try: params_file = open(os.path.join(data_dir, Experiment.EXP_PARAM_FILE_NAME), "r") except IOError: # No param file. return [agent_file.replace(".csv", "") for agent_file in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, agent_file)) and ".csv" in agent_file] agent_names = [] agent_flag = False for line in params_file.readlines(): if "Agents" in line: agent_flag = True continue if "Params" in line: agent_flag = False if agent_flag: agent_names.append(line.split(",")[0].strip()) return agent_names def _get_agent_colors(data_dir, agents): ''' Args: data_dir (str) agents (list) Returns: (list) ''' from simple_rl.experiments import Experiment try: params_file = open(os.path.join(data_dir, Experiment.EXP_PARAM_FILE_NAME), "r") except IOError: # No param file. return {agent : i for i, agent in enumerate(agents)} colors = {} # Check if episodes > 1. for line in params_file.readlines(): for agent_name in agents: if agent_name == line.strip().split(",")[0]: colors[agent_name] = int(line[-2]) return colors def _is_episodic(data_dir): ''' Returns: (bool) True iff the experiment was episodic. ''' from simple_rl.experiments import Experiment # Open param file for the experiment. if not os.path.exists(os.join(data_dir, Experiment.EXP_PARAM_FILE_NAME)): print("Warning: no experiment parameters file found for experiment. Assuming non-episodic.") return False params_file = open(os.join(data_dir, Experiment.EXP_PARAM_FILE_NAME), "r") # Check if episodes > 1. for line in params_file.readlines(): if "episodes" in line: vals = line.strip().split(":") return int(vals[1]) > 1 def _is_disc_reward(data_dir): ''' Returns: (bool) True iff the experiment recorded discounted reward. ''' from simple_rl.experiments import Experiment # Open param file for the experiment. if not os.path.exists(os.join(data_dir, Experiment.EXP_PARAM_FILE_NAME)): print("Warning: no experiment parameters file found for experiment. Assuming non-episodic.") return False params_file = open(os.join(data_dir, Experiment.EXP_PARAM_FILE_NAME), "r") # Check if episodes > 1. for line in params_file.readlines(): if "track_disc_reward" in line: vals = line.strip().split(":") if "True" == vals[1].strip(): return True return False
[docs]def parse_args(): ''' Summary: Parses two arguments, 'dir' (directory pointer) and 'a' (bool to indicate avg. plot). ''' parser = argparse.ArgumentParser() parser.add_argument("-dir", type = str, help = "Path to relevant csv files of data.") parser.add_argument("-a", type = bool, default=False, help = "If true, plots average reward (default is cumulative).") return parser.parse_args()
[docs]def main(): ''' Summary: For manual plotting. ''' # Parse args. args = parse_args() # Grab agents. data_dir = args.dir agent_names = _get_agent_names(data_dir) if len(agent_names) == 0: raise ValueError("Error: no csv files found.") if data_dir[-1] != "/": data_dir = data_dir + "/" cumulative = not(args.a) episodic = _is_episodic(data_dir) track_disc_reward = _is_disc_reward(data_dir) # Plot. make_plots(data_dir, agent_names, cumulative=cumulative, episodic=episodic, track_disc_reward=track_disc_reward)
if __name__ == "__main__": main()