cipher982 · January 3, 2019 20:22
diff --git a/sample_replay_buffer.py b/sample_replay_buffer.py
 def sample(self):
    """Randomly sample a batch of experiences from memory"""
    experiences = random.sample(self.memory, k=self.batch_size)

    states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
    actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).long().to(device)
    rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
    next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
    dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)

    return (states, actions, rewards, next_states, dones)
	def sample(self):
	"""Randomly sample a batch of experiences from memory"""
	experiences = random.sample(self.memory, k=self.batch_size)

	states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
	actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).long().to(device)
	rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
	next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
	dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)

	return (states, actions, rewards, next_states, dones)