-
Notifications
You must be signed in to change notification settings - Fork 0
/
NIPS2017.bib
252 lines (234 loc) · 23.4 KB
/
NIPS2017.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
@article{sutton_between_1999,
title = {Between {MDPs} and semi-{MDPs}: {A} framework for temporal abstraction in reinforcement learning},
volume = {112},
issn = {0004-3702},
shorttitle = {Between {MDPs} and semi-{MDPs}},
doi = {10.1016/S0004-3702(99)00052-1},
abstract = {Learning, planning, and representing knowledge at multiple levels of temporal abstraction are key, longstanding challenges for AI. In this paper we consider how these challenges can be addressed within the mathematical framework of reinforcement learning and Markov decision processes (MDPs). We extend the usual notion of action in this framework to include options—closed-loop policies for taking action over a period of time. Examples of options include picking up an object, going to lunch, and traveling to a distant city, as well as primitive actions such as muscle twitches and joint torques. Overall, we show that options enable temporally abstract knowledge and action to be included in the reinforcement learning framework in a natural and general way. In particular, we show that options may be used interchangeably with primitive actions in planning methods such as dynamic programming and in learning methods such as Q-learning. Formally, a set of options defined over an MDP constitutes a semi-Markov decision process (SMDP), and the theory of SMDPs provides the foundation for the theory of options. However, the most interesting issues concern the interplay between the underlying MDP and the SMDP and are thus beyond SMDP theory. We present results for three such cases: (1) we show that the results of planning with options can be used during execution to interrupt options and thereby perform even better than planned, (2) we introduce new intra-option methods that are able to learn about an option from fragments of its execution, and (3) we propose a notion of subgoal that can be used to improve the options themselves. All of these results have precursors in the existing literature; the contribution of this paper is to establish them in a simpler and more general setting with fewer changes to the existing reinforcement learning framework. In particular, we show that these results can be obtained without committing to (or ruling out) any particular approach to state abstraction, hierarchy, function approximation, or the macro-utility problem.},
number = {1},
urldate = {2016-12-15},
journal = {Artificial Intelligence},
author = {Sutton, R. and Precup, D. and Singh, S.},
month = aug,
year = {1999},
keywords = {Hierarchical planning, Intra-option learning, Macroactions, Macros, Markov decision processes, Options, reinforcement learning, Semi-Markov decision processes, Subgoals, Temporal abstraction},
pages = {181--211}
}
@book{sutton_reinforcement_2017,
address = {Cambridge, MA; London, England},
edition = {2},
title = {Reinforcement {Learning}: {An} {Introduction}},
publisher = {MIT Press},
author = {Sutton, R. S. and Barto, A. G.},
year = {2017},
file = {Reinforcement Learning.pdf:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\G8UWF6N2\\Reinforcement Learning.pdf:application/pdf}
}
@article{machado_learning_2016,
title = {Learning {Purposeful} {Behaviour} in the {Absence} of {Rewards}},
abstract = {Artificial intelligence is commonly defined as the ability to achieve goals in the world. In the reinforcement learning framework, goals are encoded as reward functions that guide agent behaviour, and the sum of observed rewards provide a notion of progress. However, some domains have no such reward signal, or have a reward signal so sparse as to appear absent. Without reward feedback, agent behaviour is typically random, often dithering aimlessly and lacking intentionality. In this paper we present an algorithm capable of learning purposeful behaviour in the absence of rewards. The algorithm proceeds by constructing temporally extended actions (options), through the identification of purposes that are "just out of reach" of the agent's current behaviour. These purposes establish intrinsic goals for the agent to learn, ultimately resulting in a suite of behaviours that encourage the agent to visit different parts of the state space. Moreover, the approach is particularly suited for settings where rewards are very sparse, and such behaviours can help in the exploration of the environment until reward is observed.},
urldate = {2017-05-13},
journal = {arXiv:1605.07700 [cs]},
author = {Machado, M. C. and Bowling, M.},
month = may,
year = {2016},
note = {arXiv: 1605.07700},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Learning},
file = {arXiv\:1605.07700 PDF:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\NWUF9M5A\\Machado and Bowling - 2016 - Learning Purposeful Behaviour in the Absence of Re.pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\BV8IBTVQ\\1605.html:text/html}
}
@article{tessler_deep_2016,
title = {A {Deep} {Hierarchical} {Approach} to {Lifelong} {Learning} in {Minecraft}},
abstract = {We propose a lifelong learning system that has the ability to reuse and transfer knowledge from one task to another while efficiently retaining the previously learned knowledge-base. Knowledge is transferred by learning reusable skills to solve tasks in Minecraft, a popular video game which is an unsolved and high-dimensional lifelong learning problem. These reusable skills, which we refer to as Deep Skill Networks, are then incorporated into our novel Hierarchical Deep Reinforcement Learning Network (H-DRLN) architecture using two techniques: (1) a deep skill array and (2) skill distillation, our novel variation of policy distillation (Rusu et. al. 2015) for learning skills. Skill distillation enables the HDRLN to efficiently retain knowledge and therefore scale in lifelong learning, by accumulating knowledge and encapsulating multiple reusable skills into a single distilled network. The H-DRLN exhibits superior performance and lower learning sample complexity compared to the regular Deep Q Network (Mnih et. al. 2015) in sub-domains of Minecraft.},
urldate = {2017-05-13},
journal = {arXiv:1604.07255 [cs]},
author = {Tessler, Chen and Givony, Shahar and Zahavy, Tom and Mankowitz, Daniel J. and Mannor, Shie},
month = apr,
year = {2016},
note = {arXiv: 1604.07255},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Learning},
file = {arXiv\:1604.07255 PDF:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\CURAMDPK\\Tessler et al. - 2016 - A Deep Hierarchical Approach to Lifelong Learning .pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\2768964K\\1604.html:text/html}
}
@article{wittmann_striatal_2008,
title = {Striatal {Activity} {Underlies} {Novelty}-{Based} {Choice} in {Humans}},
volume = {58},
issn = {0896-6273},
doi = {10.1016/j.neuron.2008.04.027},
abstract = {The desire to seek new and unfamiliar experiences is a fundamental behavioral tendency in humans and other species. In economic decision making, novelty seeking is often rational, insofar as uncertain options may prove valuable and advantageous in the long run. Here, we show that, even when the degree of perceptual familiarity of an option is unrelated to choice outcome, novelty nevertheless drives choice behavior. Using functional magnetic resonance imaging (fMRI), we show that this behavior is specifically associated with striatal activity, in a manner consistent with computational accounts of decision making under uncertainty. Furthermore, this activity predicts interindividual differences in susceptibility to novelty. These data indicate that the brain uses perceptual novelty to approximate choice uncertainty in decision making, which in certain contexts gives rise to a newly identified and quantifiable source of human irrationality.},
number = {6},
urldate = {2017-05-13},
journal = {Neuron},
author = {Wittmann, B. C. and Daw, N. D. and Seymour, B. and Dolan, R. J.},
month = jun,
year = {2008},
pmid = {18579085},
pmcid = {PMC2535823},
pages = {967--973}
}
@article{botvinick_model-based_2014,
title = {Model-based hierarchical reinforcement learning and human action control},
volume = {369},
copyright = {. © 2014 The Authors. Published by the Royal Society under the terms of the Creative Commons Attribution License http://creativecommons.org/licenses/by/4.0/, which permits unrestricted use, provided the original author and source are credited.},
issn = {0962-8436, 1471-2970},
doi = {10.1098/rstb.2013.0480},
abstract = {Recent work has reawakened interest in goal-directed or ‘model-based’ choice, where decisions are based on prospective evaluation of potential action outcomes. Concurrently, there has been growing attention to the role of hierarchy in decision-making and action control. We focus here on the intersection between these two areas of interest, considering the topic of hierarchical model-based control. To characterize this form of action control, we draw on the computational framework of hierarchical reinforcement learning, using this to interpret recent empirical findings. The resulting picture reveals how hierarchical model-based mechanisms might play a special and pivotal role in human decision-making, dramatically extending the scope and complexity of human behaviour.},
language = {en},
number = {1655},
urldate = {2017-05-13},
journal = {Phil. Trans. R. Soc. B},
author = {Botvinick, M and Weinstein, A.},
month = nov,
year = {2014},
pmid = {25267822},
pages = {20130480},
file = {Full Text PDF:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\4DVNWEFX\\Botvinick and Weinstein - 2014 - Model-based hierarchical reinforcement learning an.pdf:application/pdf;Snapshot:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\8GGQHAB5\\20130480.html:text/html}
}
@article{anderson_act:_1996,
title = {{ACT}: {A} simple theory of complex cognition.},
volume = {51},
shorttitle = {{ACT}},
number = {4},
journal = {American Psychologist},
author = {Anderson, John R.},
year = {1996},
pages = {355},
file = {Fulltext:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\AG5CZTCN\\Anderson - 1996 - ACT A simple theory of complex cognition..pdf:application/pdf;Snapshot:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\RWB3DQG4\\login.html:text/html}
}
@article{collins_reasoning_2012,
title = {Reasoning, {Learning}, and {Creativity}: {Frontal} {Lobe} {Function} and {Human} {Decision}-{Making}},
volume = {10},
issn = {1545-7885},
shorttitle = {Reasoning, {Learning}, and {Creativity}},
doi = {10.1371/journal.pbio.1001293},
abstract = {Computational modeling and behavioral experimentation suggest that human frontal lobe function is capable of monitoring three or four concurrent behavioral strategies in order to select the most suitable one during decision-making.},
number = {3},
urldate = {2017-10-31},
journal = {PLOS Biology},
author = {Collins, A. and Koechlin, E.},
month = mar,
year = {2012},
keywords = {Behavior, Decision Making, Entropy, Human performance, Learning, Long-term memory, Reliability, Sensory cues},
pages = {e1001293},
file = {Full Text PDF:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\CZ742CEM\\Collins and Koechlin - 2012 - Reasoning, Learning, and Creativity Frontal Lobe .pdf:application/pdf;Snapshot:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\4P3VWE7N\\article.html:text/html}
}
@article{miller_integrative_2001,
title = {An integrative theory of prefrontal cortex function},
volume = {24},
issn = {0147-006X},
doi = {10.1146/annurev.neuro.24.1.167},
abstract = {The prefrontal cortex has long been suspected to play an important role in cognitive control, in the ability to orchestrate thought and action in accordance with internal goals. Its neural basis, however, has remained a mystery. Here, we propose that cognitive control stems from the active maintenance of patterns of activity in the prefrontal cortex that represent goals and the means to achieve them. They provide bias signals to other brain structures whose net effect is to guide the flow of activity along neural pathways that establish the proper mappings between inputs, internal states, and outputs needed to perform a given task. We review neurophysiological, neurobiological, neuroimaging, and computational studies that support this theory and discuss its implications as well as further issues to be addressed},
language = {eng},
journal = {Annual Review of Neuroscience},
author = {Miller, E. K. and Cohen, J. D.},
year = {2001},
pmid = {11283309},
keywords = {Animals, Attention, COGNITION, Humans, Memory, Models, Neurological, Neural Pathways, Neurons, Prefrontal Cortex},
pages = {167--202}
}
@article{frank_mechanisms_2012,
title = {Mechanisms of {Hierarchical} {Reinforcement} {Learning} in {Corticostriatal} {Circuits} 1: {Computational} {Analysis}},
volume = {22},
issn = {1047-3211},
shorttitle = {Mechanisms of {Hierarchical} {Reinforcement} {Learning} in {Corticostriatal} {Circuits} 1},
doi = {10.1093/cercor/bhr114},
abstract = {Growing evidence suggests that the prefrontal cortex (PFC) is organized hierarchically, with more anterior regions having increasingly abstract representations. How does this organization support hierarchical cognitive control and the rapid discovery of abstract action rules? We present computational models at different levels of description. A neural circuit model simulates interacting corticostriatal circuits organized hierarchically. In each circuit, the basal ganglia gate frontal actions, with some striatal units gating the inputs to PFC and others gating the outputs to influence response selection. Learning at all of these levels is accomplished via dopaminergic reward prediction error signals in each corticostriatal circuit. This functionality allows the system to exhibit conditional if–then hypothesis testing and to learn rapidly in environments with hierarchical structure. We also develop a hybrid Bayesian-reinforcement learning mixture of experts (MoE) model, which can estimate the most likely hypothesis state of individual participants based on their observed sequence of choices and rewards. This model yields accurate probabilistic estimates about which hypotheses are attended by manipulating attentional states in the generative neural model and recovering them with the MoE model. This 2-pronged modeling approach leads to multiple quantitative predictions that are tested with functional magnetic resonance imaging in the companion paper.},
number = {3},
urldate = {2017-10-31},
journal = {Cerebral Cortex},
author = {Frank, M. J. and Badre, D.},
month = mar,
year = {2012},
pages = {509--526},
file = {Full Text PDF:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\M7HZ2WZB\\Frank and Badre - 2012 - Mechanisms of Hierarchical Reinforcement Learning .pdf:application/pdf;Snapshot:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\3X9HUKXC\\325387.html:text/html}
}
@article{chase_perception_1973,
title = {Perception in chess},
volume = {4},
issn = {0010-0285},
doi = {10.1016/0010-0285(73)90004-2},
abstract = {This paper develops a technique for isolating and studying the perceptual structures that chess players perceive. Three chess players of varying strength — from master to novice — were confronted with two tasks: (1) A perception task, where the player reproduces a chess position in plain view, and (2) de Groot's (1965) short-term recall task, where the player reproduces a chess position after viewing it for 5 sec. The successive glances at the position in the perceptual task and long pauses in the memory task were used to segment the structures in the reconstruction protocol. The size and nature of these structures were then analyzed as a function of chess skill.},
number = {1},
journal = {Cognitive Psychology},
author = {Chase, W. G. and Simon, H. A.},
month = jan,
year = {1973},
pages = {55--81},
file = {ScienceDirect Full Text PDF:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\GUXDDIRE\\Chase and Simon - 1973 - Perception in chess.pdf:application/pdf;ScienceDirect Snapshot:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\FRNMXGMC\\0010028573900042.html:text/html}
}
@article{gopnik_scientific_2012,
title = {Scientific {Thinking} in {Young} {Children}: {Theoretical} {Advances}, {Empirical} {Research}, and {Policy} {Implications}},
volume = {337},
copyright = {Copyright © 2012, American Association for the Advancement of Science},
issn = {0036-8075, 1095-9203},
shorttitle = {Scientific {Thinking} in {Young} {Children}},
doi = {10.1126/science.1223416},
abstract = {New theoretical ideas and empirical research show that very young children’s learning and thinking are strikingly similar to much learning and thinking in science. Preschoolers test hypotheses against data and make causal inferences; they learn from statistics and informal experimentation, and from watching and listening to others. The mathematical framework of probabilistic models and Bayesian inference can describe this learning in precise ways. These discoveries have implications for early childhood education and policy. In particular, they suggest both that early childhood experience is extremely important and that the trend toward more structured and academic early childhood programs is misguided.},
language = {en},
number = {6102},
urldate = {2017-10-31},
journal = {Science},
author = {Gopnik, A.},
month = sep,
year = {2012},
pmid = {23019643},
pages = {1623--1627},
file = {Full Text PDF:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\CGRGQWCF\\Gopnik - 2012 - Scientific Thinking in Young Children Theoretical.pdf:application/pdf;Snapshot:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\PN3RIZS3\\1623.html:text/html}
}
@article{schmidhuber_formal_2010,
title = {Formal {Theory} of {Creativity}, {Fun}, and {Intrinsic} {Motivation} (1990 \#x2013;2010)},
volume = {2},
issn = {1943-0604},
doi = {10.1109/TAMD.2010.2056368},
abstract = {The simple, but general formal theory of fun and intrinsic motivation and creativity (1990-2010) is based on the concept of maximizing intrinsic reward for the active creation or discovery of novel, surprising patterns allowing for improved prediction or data compression. It generalizes the traditional field of active learning, and is related to old, but less formal ideas in aesthetics theory and developmental psychology. It has been argued that the theory explains many essential aspects of intelligence including autonomous development, science, art, music, and humor. This overview first describes theoretically optimal (but not necessarily practical) ways of implementing the basic computational principles on exploratory, intrinsically motivated agents or robots, encouraging them to provoke event sequences exhibiting previously unknown, but learnable algorithmic regularities. Emphasis is put on the importance of limited computational resources for online prediction and compression. Discrete and continuous time formulations are given. Previous practical, but nonoptimal implementations (1991, 1995, and 1997-2002) are reviewed, as well as several recent variants by others (2005-2010). A simplified typology addresses current confusion concerning the precise nature of intrinsic motivation.},
number = {3},
journal = {IEEE Transactions on Autonomous Mental Development},
author = {Schmidhuber, J.},
month = sep,
year = {2010},
keywords = {active learning, aesthetics, aesthetics theory, Art, Attention, COGNITION, Computational intelligence, creativity, data compression, developmental psychology, event sequences, Eyes, Feedback, Fingers, formal theory of creativity, fun, humor, Intelligent robots, intrinsic motivation, intrinsic reward, limited computational resources, music, novel patterns, novelty, Pediatrics, Predictive models, Psychology, science, surprise, typology of intrinsic motivation},
pages = {230--247},
file = {IEEE Xplore Abstract Record:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\NHZU3H68\\5508364.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\Z7795IW5\\Schmidhuber - 2010 - Formal Theory of Creativity, Fun, and Intrinsic Mo.pdf:application/pdf}
}
@article{schultz_neural_1997,
title = {A {Neural} {Substrate} of {Prediction} and {Reward}},
volume = {275},
copyright = {© 1997 American Association for the Advancement of Science},
issn = {0036-8075, 1095-9203},
doi = {10.1126/science.275.5306.1593},
abstract = {The capacity to predict future events permits a creature to detect, model, and manipulate the causal structure of its interactions with its environment. Behavioral experiments suggest that learning is driven by changes in the expectations about future salient events such as rewards and punishments. Physiological work has recently complemented these studies by identifying dopaminergic neurons in the primate whose fluctuating output apparently signals changes or errors in the predictions of future salient and rewarding events. Taken together, these findings can be understood through quantitative theories of adaptive optimizing control.},
language = {en},
number = {5306},
urldate = {2017-10-31},
journal = {Science},
author = {Schultz, Wolfram and Dayan, Peter and Montague, P. Read},
month = mar,
year = {1997},
pmid = {9054347},
pages = {1593--1599},
file = {Full Text PDF:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\HWSNFZZD\\Schultz et al. - 1997 - A Neural Substrate of Prediction and Reward.pdf:application/pdf;Snapshot:C\:\\Users\\maria\\AppData\\Roaming\\Zotero\\Zotero\\Profiles\\a3wwptsq.default\\zotero\\storage\\VEVNTEVS\\1593.html:text/html}
}
@article{lepper_undermining_1973,
title = {Undermining {Children}'s {Intrinsic} {Interest} with {Extrinsic} {Reward}: {A} {Test} of the "{Overjustification}" {Hypothesis}},
volume = {28},
journal = {Journal of Personality and Social Psychology},
author = {Lepper, Mark and Others, And},
year = {1973}
}
@inproceedings{chentanez_intrinsically_2005,
title = {Intrinsically motivated reinforcement learning},
booktitle = {Advances in neural information processing systems},
author = {Chentanez, Nuttapong and Barto, Andrew G and Singh, Satinder P},
year = {2005},
pages = {1281--1288}
}
@article{pathak_curiosity-driven_2017,
title = {Curiosity-driven exploration by self-supervised prediction},
journal = {arXiv preprint arXiv:1705.05363},
author = {Pathak, Deepak and Agrawal, Pulkit and Efros, Alexei A and Darrell, Trevor},
year = {2017}
}
@inproceedings{kulkarni_hierarchical_2016,
title = {Hierarchical deep reinforcement learning: {Integrating} temporal abstraction and intrinsic motivation},
booktitle = {Advances in {Neural} {Information} {Processing} {Systems}},
author = {Kulkarni, Tejas D and Narasimhan, Karthik and Saeedi, Ardavan and Tenenbaum, Josh},
year = {2016},
pages = {3675--3683}
}