dineshj1 · September 17, 2024 19:58
diff --git a/dj_papers.bib b/dj_papers.bib
 @article{
 title={ZeroMimic: Distilling Robotic Manipulation Skills from Web Videos},
 authors={Zhuolun Zhao* and Junyao Shi* and Tianyou Wang and Jason Ma and Dinesh Jayaraman},
 abstract={Many recent advances in robotic manipulation
 have come through imitation learning, yet these rely largely
 on mimicking a particularly hard-to-acquire form of demon-
 strations: those collected on the same robot in the same room
 with the same objects as the trained policy must handle at test
 time. In contrast, large pre-recorded human video datasets
 demonstrating manipulation skills in-the-wild already exist,
 which contain valuable information for robots. Is it possible to
 distill a repository of useful robotic skill policies out of such
 data without any additional requirements on robot-specific
 demonstrations or exploration? We present the first such
 system ZeroMimic, that generates immediately deployable image
 goal-conditioned skill policies for several common categories
 of manipulation tasks (opening, closing, pouring, pick&place,
 cutting, and stirring) each capable of acting upon diverse objects
 and across diverse unseen task setups. ZeroMimic is carefully
 designed to exploit recent advances in semantic and geometric
 visual understanding of human videos, together with modern
 grasp affordance detectors and imitation policy classes. After
 training ZeroMimic on the popular EpicKitchens dataset of ego-
 centric human videos, we evaluate its out-of-the-box performance
 in varied kitchen settings, demonstrating its impressive abilities
 to handle these varied tasks. To enable plug-and-play reuse of
 ZeroMimic policies on other task setups and robots, we will
 release software and policy checkpoints for all skills.},
 journal={(under review)},
 year={2024}
 }

 @article{
 title={Leveraging Symmetry to Accelerate Learning of Trajectory Tracking Controllers for Free-Flying Robotic Systems},
 authors={Jake Welde* and Nishanth Rao* and Pratik Kunapuli* and Dinesh Jayaraman and Vijay Kumar},
 abstract={Tracking controllers enable robotic systems to
 accurately follow planned reference trajectories. In particular,
 reinforcement learning (RL) has shown promise in the synthesis
 of controllers for systems with complex dynamics and modest
 online compute budgets. However, the poor sample efficiency of
 RL and the challenges of reward design make training slow and
 sometimes unstable, especially for high-dimensional systems. In
 this work, we leverage the inherent Lie group symmetries of
 robotic systems with a floating base to mitigate these chal-
 lenges when learning tracking controllers. We model a general
 tracking problem as a Markov decision process (MDP) that
 captures the evolution of both the physical and reference states.
 Next, we prove that symmetry in the underlying dynamics and
 running costs leads to an MDP homomorphism, a mapping
 that allows a policy trained on a lower-dimensional “quotient”
 MDP to be lifted to an optimal tracking controller for the
 original system. We compare this symmetry-informed approach
 to an unstructured baseline, using Proximal Policy Optimization
 (PPO) to learn tracking controllers for three systems: the
 Particle (a forced point mass), the Astrobee (a fully-
 actuated space robot), and the Quadrotor (an underactuated
 system). Results show that a symmetry-aware approach both
 accelerates training and reduces tracking error after the same
 number of training steps.},
 journal={(under review)},
 year={2024}
 }
 @article{liang2024eurekaverse,
 title = {Environment Curriculum Generation via Large Language Models},
 authors = {Will Liang and Sam Wang and Hungju Wang and Yecheng Jason Ma and Osbert Bastani and Dinesh Jayaraman},
 abstract={Recent work has demonstrated that a promising strategy for teaching robots a wide range of complex skills is by training them on a curriculum of progressively more challenging environments. However, developing an effective curriculum of environment distributions currently requires significant expertise, which must be repeated for every new domain. Our key insight is that environments are often naturally represented as code. Thus, we probe whether effective environment curriculum design can be achieved and automated via code generation by large language models (LLM). In this paper, we introduce Eurekaverse, an unsupervised environment design algorithm that uses LLMs to sample progressively more challenging, diverse, and learnable environments for skill training. We validate Eurekaverse's effectiveness in the domain of quadrupedal parkour learning, in which a quadruped robot must traverse through a variety of obstacle courses. The automatic curriculum designed by Eurekaverse enables gradual learning of complex parkour skills in simulation and can successfully transfer to the real-world, outperforming manual training courses designed by humans.},
 journal={CORL},
 year={2024}
 }
 @article{qian2024hodor,
  title={Task-Oriented Hierarchical Object Decomposition for Visuomotor Control },
  authors = {Jianing Qian and Bernadette Bucher and Dinesh Jayaraman},
  abstract={Good pre-trained visual representations could enable robots to learn visuomotor policy efficiently. Still, existing representations take a one-size-fits-all-tasks approach that comes with two important drawbacks: (1) Being completely task-agnostic, these representations cannot effectively ignore any task-irrelevant information in the scene,  and (2) They often lack the representational capacity to handle unconstrained/complex real-world scenes. Instead, we propose to train a large combinatorial family of representations organized by scene entities: objects and object parts. This \underline{h}ierarchical \underline{o}bject \underline{d}ecomposition for task-\underline{o}riented \underline{r}epresentations (\methodname) permits selectively assembling different representations specific to each task while scaling in representational capacity with the complexity of the scene and the task. In our experiments, we find that \methodname outperforms prior pre-trained representations, both scene vector representations and object-centric representations, for sample-efficient imitation learning across 5 simulated and 5 real-world manipulation tasks. We further find that the invariances captured in \methodname are inherited into downstream policies, which can robustly generalize to out-of-distribution test conditions, permitting zero-shot skill chaining. Appendix and videos: https://sites.google.com/view/hodor-corl24.},
  journal={CORL},
  year={2024}
 }
 @article{open_x_embodiment_rt_x_2024,
 title={Open {X-E}mbodiment: Robotic Learning Datasets and {RT-X} Models},
 author = {Large collaboration},
 journal  = {ICRA},
 year = {2024},
 url = {https://robotics-transformer-x.github.io/}
 }
 @article{ma2024dreureka,
    title   = {DrEureka: Language Model Guided Sim-To-Real Transfer},
    author  = {Yecheng Jason Ma and William Liang and Hungju Wang and Sam Wang and Yuke Zhu and Linxi Fan and Osbert Bastani and Dinesh Jayaraman},
    year    = {2024},
    journal = {RSS}
 }
 @article{khazatsky2024droid,
      title={DROID: A Large-Scale In-The-Wild Robot Manipulation Dataset}, 
      author={Alexander Khazatsky and Karl Pertsch and Suraj Nair and Ashwin Balakrishna and Sudeep Dasari and Siddharth Karamcheti and Soroush Nasiriany and Mohan Kumar Srirama and Lawrence Yunliang Chen and Kirsty Ellis and Peter David Fagan and Joey Hejna and Masha Itkina and Marion Lepert and Yecheng Jason Ma and Patrick Tree Miller and Jimmy Wu and Suneel Belkhale and Shivin Dass and Huy Ha and Arhan Jain and Abraham Lee and Youngwoon Lee and Marius Memmel and Sungjae Park and Ilija Radosavovic and Kaiyuan Wang and Albert Zhan and Kevin Black and Cheng Chi and Kyle Beltran Hatch and Shan Lin and Jingpei Lu and Jean Mercat and Abdul Rehman and Pannag R Sanketi and Archit Sharma and Cody Simpson and Quan Vuong and Homer Rich Walke and Blake Wulfe and Ted Xiao and Jonathan Heewon Yang and Arefeh Yavary and Tony Z. Zhao and Christopher Agia and Rohan Baijal and Mateo Guaman Castro and Daphne Chen and Qiuyu Chen and Trinity Chung and Jaimyn Drake and Ethan Paul Foster and Jensen Gao and David Antonio Herrera and Minho Heo and Kyle Hsu and Jiaheng Hu and Donovon Jackson and Charlotte Le and Yunshuang Li and Kevin Lin and Roy Lin and Zehan Ma and Abhiram Maddukuri and Suvir Mirchandani and Daniel Morton and Tony Nguyen and Abigail O'Neill and Rosario Scalise and Derick Seale and Victor Son and Stephen Tian and Emi Tran and Andrew E. Wang and Yilin Wu and Annie Xie and Jingyun Yang and Patrick Yin and Yunchu Zhang and Osbert Bastani and Glen Berseth and Jeannette Bohg and Ken Goldberg and Abhinav Gupta and Abhishek Gupta and Dinesh Jayaraman and Joseph J Lim and Jitendra Malik and Roberto Martín-Martín and Subramanian Ramamoorthy and Dorsa Sadigh and Shuran Song and Jiajun Wu and Michael C. Yip and Yuke Zhu and Thomas Kollar and Sergey Levine and Chelsea Finn},
      year={2024},
      journal={RSS}
 }
 @article{stern2024physical,
  title={Training self-learning circuits for power-efficient solutions},
  author={Stern, Menachem and Dillavou, Sam and Jayaraman, Dinesh and Durian, Douglas J and Liu, Andrea J},
  journal={Applied Physics Letters (APL) Machine Learning},
  year={2024}
 }
 @article{zhang2024universal,
      title={Universal Visual Decomposer: Long-Horizon Manipulation Made Easy}, 
      author={Zichen Zhang and Yunshuang Li and Osbert Bastani and Abhishek Gupta and Dinesh Jayaraman and Yecheng Jason Ma and Luca Weihs},
      year={2024},
      journal={ICRA},
 }
 @article{qian2024soft,
  title={Recasting Generic Pretrained Vision Transformers As Object-Centric Scene Encoders For Manipulation Policies },
    author={Jianing Qian and Anastasios Panagopoulos and Dinesh Jayaraman},
  abstract={Generic re-usable pre-trained image representation encoders
 have become a standard component of methods for many
 computer vision tasks. As visual representations for robots
 however, their utility has been limited, leading to a
 recent wave of efforts to pre-train robotics-specific image
 encoders that are better suited to robotic tasks than their
 generic counterparts. We propose SOFT, a wrapper around
 pre-trained vision transformer PVT models that bridges this
 gap without any further training. Rather than construct
 representations out of only the final layer activations,
 SOFT individuates and locates object-like entities from PVT
 attentions, and describes them with PVT activations,
 producing an object-centric representation. Across standard
 choices of generic pre-trained vision transformers PVT, we
 demonstrate in each case that policies trained on SOFT(PVT)
 far outstrip standard PVT representations for manipulation
 tasks in simulated and real settings, approaching the
 state-of-the-art robotics-aware representations.},
  url={https://sites.google.com/view/robot-soft/},
  journal={ICRA},
  year={2024}
 }
 @article{shi2024plug,
  title={Composing Pre-Trained Object-Centric Representations for Robotics From “What” and “Where” Foundation Models},
  author={Shi*, Junyao and Qian*, Jianing and Ma, Yecheng Jason and Jayaraman, Dinesh},
  abstract={There have recently been large advances both in pre-training visual representations for robotic control and segmenting unknown category objects in general images. To leverage these for improved robot learning, we propose POCR, a new framework for building pre-trained object-centric representations for robotic control. Building on theories of “what-where” representations in psychology and computer vision, we use segmentations from a pre-trained model to stably locate across timesteps, various entities in the scene, capturing “where” information. To each such segmented entity, we apply other pre-trained models that build vector descriptions suitable for robotic control tasks, thus capturing “what” the entity is. Thus, our pre-trained object-centric representations for control are constructed by appropriately combining the outputs of off-the-shelf pre-trained models, with no new training. On various simulated and real robotic tasks, we show that imitation policies for robotic manipulators trained on POCR achieve better performance and systematic generalization than state of the art pre-trained representations for robotics, as well as prior object-centric representations that are typically trained from scratch.},
 url={https://sites.google.com/view/pocr},
  journal={ICRA},
  year={2024}
 }
 @article{narayanan2024long,
  title={Long-HOT: A Modular Hierarchical Approach for Long-Horizon Object Transport},
  author={Narayanan, Sriram and Jayaraman, Dinesh and Chandraker, Manmohan},
  journal={ICRA},
  abstract={We aim to address key challenges in long-horizon
 embodied exploration and navigation by proposing a
 long-horizon object transport task called Long-HOT and a
 novel modular framework for temporally extended navigation.
 Agents in Long-HOT need to efficiently find and pick up
 target objects that are scattered in the environment, carry
 them to a goal location with load constraints, and
 optionally have access to a container. We propose a modular
 topological graph-based transport policy (HTP) that
 explores efficiently with the help of weighted frontiers.
 Our approach uses a combination of motion planning to reach
 point goals within explored locations and object navigation
 policies for moving towards semantic targets at unknown
 locations. Experiments on both our proposed Habitat
 transport task and on MultiOn benchmarks show that our
 method outperforms baselines and prior works. Further, we
 analyze the agent's behavior for the usage of the container
 and demonstrate meaningful generalization to much harder
 transport scenes with training only on simpler versions of
 the task. We will release all the code and data.},
  year={2024}
 }
 @article{hu2024scaffolder,
  title={Privileged Sensing Scaffolds Reinforcement Learning},
  author={Edward Hu and James Springer and Oleh Rybkin and Dinesh Jayaraman},
  journal={ICLR},
  year={2024},
  abstract={We need to look at our shoelaces as we first learn to tie them but having mastered this skill, can do it from touch alone. We call this phenomenon “sensory scaffolding”: observation streams that are not needed by a master might yet aid a novice learner. We consider such sensory scaffolding setups for training artificial agents. For example, a robot arm may need to be deployed with just a low-cost, robust, general-purpose camera; yet its performance may improve by having privileged training-time-only access to informative albeit expensive and unwieldy motion capture rigs or fragile tactile sensors. For these settings, we propose Scaffolder, a reinforcement learning approach which effectively exploits privileged sensing in critics, world models, reward estimators, and other such auxiliary components that are only used at training time, to improve the target policy. For evaluating sensory scaffolding agents, we design a new “S3” suite of ten diverse simulated robotic tasks that explore a wide range of practical sensor setups. Agents must use privileged camera sensing to train blind hurdlers, privileged active visual perception to help robot arms overcome visual occlusions, privileged touch sensors to train robot hands, and more. Scaffolder easily outperforms relevant prior baselines and frequently performs comparably even to policies that have test-time access to the privileged sensors.}
 }
 @article{wen2024relativit,
  title={Can Transformers Capture Spatial Relations between Objects?},
  author={Chuan Wen and Dinesh Jayaraman and Yang Gao},
  journal={ICLR},
  year={2024},
  abstract={Spatial relationships between objects represent key scene information for humans to understand and interact with the world. To study the capability of current computer vision systems to recognize physically grounded spatial relations, we start by proposing precise relation definitions that permit consistently annotating a benchmark dataset. Despite the apparent simplicity of this task relative to others in the recognition literature, we observe that existing approaches perform poorly on this benchmark. We propose new approaches exploiting the long-range attention capabilities of transformers for this task, and evaluating key design principles. We identify a simple "RelatiViT" architecture and demonstrate that it outperforms all current approaches. To our knowledge, this is the first method to convincingly outperform naive baselines on spatial relation prediction in in-the-wild settings.}
 }
 @article{ma2024eureka,
      title={Eureka: Human-Level Reward Design via Coding Large Language Models}, 
      author={Yecheng Jason Ma and William Liang and Guanzhi Wang and De-An Huang and Osbert Bastani and Dinesh Jayaraman and Yuke Zhu and Linxi Fan and Anima Anandkumar},
      year={2024},
      journal={ICLR}
 }
 @article{sridhar2024memoryconsistent,
      title={Memory-Consistent Neural Networks for Imitation Learning}, 
      author={Kaustubh Sridhar and Souradeep Dutta and Dinesh Jayaraman and James Weimer and Insup Lee},
      year={2024},
      journal={ICLR}
 }
 @article{vedder2024zeroflow,
  title={ZeroFlow: Fast Zero Label Scene Flow via Distillation},
  author={Vedder, Kyle and Peri, Neehar and Chodosh, Nathaniel and Khatri, Ishan and Eaton, Eric and Jayaraman, Dinesh and Liu, Yang and Ramanan, Deva and Hays, James},
  journal={ICLR},
  year={2024}
 }
 @article{wan2024tlcontrol,
      title={TLControl: Trajectory and Language Control for Human Motion Synthesis}, 
      author={Weilin Wan and Zhiyang Dou and Taku Komura and Wenping Wang and Dinesh Jayaraman and Lingjie Liu},
      year={2024},
      abstract={Controllable human motion synthesis is essential for applications in AR/VR, gaming and embodied AI. Existing methods often focus solely on either language or full trajectory control, lacking precision in synthesizing motions aligned with user-specified trajectories, especially for multi-joint control. To address these issues, we present TLControl, a novel method for realistic human motion synthesis, incorporating both low-level Trajectory and high-level Language semantics controls, through the integration of neural-based and optimization-based techniques. Specifically, we begin with training a VQ-VAE for a compact and well-structured latent motion space organized by body parts. We then propose a Masked Trajectories Transformer (MTT) for predicting a motion distribution conditioned on language and trajectory. Once trained, we use MTT to sample initial motion predictions given user-specified partial trajectories and text descriptions as conditioning. Finally, we introduce a test-time optimization to refine these coarse predictions for precise trajectory control, which offers flexibility by allowing users to specify various optimization goals and ensures high runtime efficiency. Comprehensive experiments show that TLControl significantly outperforms the state-of-the-art in trajectory accuracy and time efficiency, making it practical for interactive and high-quality animation generation.},
      journal={ECCV},
 }

 @InProceedings{desilva23prospective,
  title = 	 {Prospective Learning: Principled Extrapolation to the Future},
  author =       {De Silva, Ashwin and Ramesh, Rahul and Ungar, Lyle and Shuler, Marshall Hussain and Cowan, Noah J. and Platt, Michael and Li, Chen and Isik, Leyla and Roh, Seung-Eon and Charles, Adam and Venkataraman, Archana and Caffo, Brian and How, Javier J. and Kebschull, Justus M and Krakauer, John W. and Bichuch, Maxim and Kinfu, Kaleab Alemayehu and Yezerets, Eva and Jayaraman, Dinesh and Shin, Jong M. and Villar, Soledad and Phillips, Ian and Priebe, Carey E. and Hartung, Thomas and Miller, Michael I. and Dey, Jayanta and Huang, Ningyuan and Eaton, Eric and Etienne-Cummings, Ralph and Ogburn, Elizabeth L. and Burns, Randal and Osuagwu, Onyema and Mensh, Brett and Muotri, Alysson R. and Brown, Julia and White, Chris and Yang, Weiwei and Verstynen, Andrei A. Rusu Timothy and Kording, Konrad P. and Chaudhari, Pratik and Vogelstein, Joshua T.},
  booktitle = 	 {Proceedings of The 2nd Conference on Lifelong Learning Agents},
  year = 	 {2023},
  publisher =    {PMLR},
  pdf = 	 {https://proceedings.mlr.press/v232/de-silva23a/de-silva23a.pdf},
  url = 	 {https://proceedings.mlr.press/v232/de-silva23a.html},
  abstract = 	 {Learning is a process which can update decision rules, based on past experience, such that future performance improves. Traditionally, machine learning is often evaluated under the assumption that the future will be identical to the past in distribution or change adversarially. But these assumptions can be either too optimistic or pessimistic for many problems in the real world. Real world scenarios evolve over multiple spatiotemporal scales with partially predictable dynamics. Here we reformulate the learning problem to one that centers around this idea of dynamic futures that are partially learnable. We conjecture that certain sequences of tasks are not retrospectively learnable (in which the data distribution is fixed), but are prospectively learnable (in which distributions may be dynamic), suggesting that prospective learning is more difficult in kind than retrospective learning. We argue that prospective learning more accurately characterizes many real world problems that (1) currently stymie existing artificial intelligence solutions and/or (2) lack adequate explanations for how natural intelligences solve them. Thus, studying prospective learning will lead to deeper insights and solutions to currently vexing challenges in both natural and artificial intelligences.}
 }
 @article{kim2023im2contact,
  title={Vision-Based Contact Localization Without Touch or Force Sensing},
  author={Leon Kim and Yunshuang Li and Michael Posa and Dinesh Jayaraman},
  journal={CORL},
  year={2023}
 }
 @article{ma2023liv,
  title         = "{LIV}: Language-Image Representations and Rewards for Robotic Control",
  author        = "Yecheng Jason Ma and Vikash Kumar and Amy Zhang and Osbert Bastani and Dinesh Jayaraman",
  journal= {ICML}, 
  year = {2023},
 }
 @article{jia2024learning,
  title={Learning a Meta-Controller for Dynamic Grasping},
  author={Jia, Yinsen and Xu, Jingxi and Jayaraman, Dinesh and Song, Shuran},
  abstract={Grasping moving objects is a challenging task that requires multiple submodules such as object pose predictor, arm motion planner, etc. Each submodule operates under its own set of meta-parameters. For example, how far the pose predictor should look into the future (i.e., \textit{look-ahead time}) and the maximum amount of time the motion planner can spend planning a motion (i.e., \textit{time budget}). Many previous works assign fixed values to these parameters; however, at different moments \textit{within} a single episode of dynamic grasping, the optimal values should vary depending on the current scene. In this work, we propose a dynamic grasping pipeline with a meta-controller that controls the look-ahead time and time budget dynamically. We learn the meta-controller through reinforcement learning with a sparse reward. Our experiments show the meta-controller improves the grasping success rate (up to 28\% in the most cluttered environment) and reduces grasping time, compared to the strongest baseline. Our meta-controller learns to reason about the reachable workspace and maintain the predicted pose within the reachable region. In addition, it assigns a small but sufficient time budget for the motion planner. Our method can handle different objects, trajectories, and obstacles. Despite being trained only with 3-6 random cuboidal obstacles, our meta-controller generalizes well to 7-9 obstacles and more realistic out-of-domain household setups with unseen obstacle shapes.},
  journal={CASE},
  year={2024}
 }
 @article{hu2023peg,
  title = "Planning Goals for Exploration",
  author = "Edward Hu and Richard Chang and Oleh Rybkin and Dinesh Jayaraman",
  journal= {ICLR (top 25 percent) and Best Workshop Paper at CORL 2022 Robot Adaptation Workshop}, 
  year = 2023,
 }
 @article{kausik2023tom,
  title         = "Learning Policy-Aware Models for Model-Based Reinforcement Learning via Transition Occupancy Matching",
  author        = "Yecheng Jason Ma and Kausik Sivakumar and Jason Yen and Osbert Bastani and Dinesh Jayaraman",
  journal = {L4DC},
  year          =  2023,
 }
 @article{ma2023vip,
  title         = "{VIP}: Towards Universal Visual Reward and Representation
                   via {Value-Implicit} {Pre-Training}",
  author        = "Ma, Yecheng Jason and Sodhani, Shagun and Jayaraman, Dinesh
                   and Bastani, Osbert and Kumar, Vikash and Zhang, Amy",
  journal= {ICLR (top 25 percent)}, 
  year = 2023,
 }
 @article{huang2022lirf,
      title={Training Robots to Evaluate Robots: Example-Based Interactive Reward Functions for Policy Learning}, 
      author={Kun Huang and Edward Hu and Dinesh Jayaraman},
      year={2022},
      journal= {CORL}
 }
 @article{qian2022dkp,
      title={Discovering Deformable Keypoint Pyramids}, 
      author={Jianing Qian and Anastasios Panagopoulos and Dinesh Jayaraman},
      year={2022},
      journal= {ECCV}
 }
 @article{ma2022far,
  title={How Far I'll Go: Offline Goal-Conditioned Reinforcement Learning via $ f $-Advantage Regression},
  author={Ma, Yecheng Jason and Yan, Jason and Jayaraman, Dinesh and Bastani, Osbert},
  journal={NeurIPS},
  year={2022}
 }
 @article{wen2022priming,
      title={Fighting Fire with Fire: Avoiding DNN Shortcuts through Priming}, 
      author={Chuan Wen and Jianing Qian and Jierui Lin and Jiaye Teng and Dinesh Jayaraman and Yang Gao},
      year={2022},
      journal= {ICML}
 }
 @article{ma2022smodice,
      title={SMODICE: Versatile Offline Imitation Learning via State Occupancy Matching}, 
      author={Yecheng Jason Ma and Andrew Shen and Dinesh Jayaraman and Osbert Bastani},
      year={2022},
      journal= {ICML}
 }
 @ARTICLE{Vogelstein2022-mn,
  title         = "Prospective Learning: Back to the Future",
  author        = "Vogelstein, Joshua T and Verstynen, Timothy and Kording,
                   Konrad P and Isik, Leyla and Krakauer, John W and
                   Etienne-Cummings, Ralph and Ogburn, Elizabeth L and Priebe,
                   Carey E and Burns, Randal and Kutten, Kwame and Knierim,
                   James J and Potash, James B and Hartung, Thomas and
                   Smirnova, Lena and Worley, Paul and Savonenko, Alena and
                   Phillips, Ian and Miller, Michael I and Vidal, Rene and
                   Sulam, Jeremias and Charles, Adam and Cowan, Noah J and
                   Bichuch, Maxim and Venkataraman, Archana and Li, Chen and
                   Thakor, Nitish and Kebschull, Justus M and Albert, Marilyn
                   and Xu, Jinchong and Shuler, Marshall Hussain and Caffo,
                   Brian and Ratnanather, Tilak and Geisa, Ali and Roh,
                   Seung-Eon and Yezerets, Eva and Madhyastha, Meghana and How,
                   Javier J and Tomita, Tyler M and Dey, Jayanta and {Ningyuan}
                   and {Huang} and Shin, Jong M and Kinfu, Kaleab Alemayehu and
                   Chaudhari, Pratik and Baker, Ben and Schapiro, Anna and
                   Jayaraman, Dinesh and Eaton, Eric and Platt, Michael and
                   Ungar, Lyle and Wehbe, Leila and Kepecs, Adam and
                   Christensen, Amy and Osuagwu, Onyema and Brunton, Bing and
                   Mensh, Brett and Muotri, Alysson R and Silva, Gabriel and
                   Puppo, Francesca and Engert, Florian and Hillman, Elizabeth
                   and Brown, Julia and White, Chris and Yang, Weiwei",
  month         =  jan,
  year          =  2022,
  archivePrefix = "arXiv",
  primaryClass  = "cs.LG",
  eprint        = "2201.07372"
 }
 @article{ma2022cap, title= {Conservative and Adaptive Penalty for Model-Based Safe Reinforcement Learning}, author= {Ma, Yecheng Jason and Shen, Andrew and Bastani, Osbert and Jayaraman, Dinesh}, journal= {AAAI}, year= {2022}}
 @article{hu2022rac, author = {Edward S. Hu and Kun Huang and Oleh Rybkin and Dinesh Jayaraman}, journal = {ICLR}, title = {Know Thyself: Transferable Visuomotor Control Through Robot-Awareness}, year = {2022}}
 @article{ma2021conservative, title= {Conservative Offline Distributional Reinforcement Learning}, author= {Ma, Yecheng Jason and {Jayaraman}, {Dinesh} and Bastani, Osbert}, journal= {NeurIPS}, year= {2021}}
 @article{kolotouros2021embracing, title= {Embracing the Reconstruction Uncertainty in 3D Human Pose Estimation}, author= {Kolotouros, Nikos and Pavlakos, Georgios and {Jayaraman}, {Dinesh} and Daniilidis, Kostas}, journal= {ICCV}, year= {2021}}
 % - comments @article{ma2021uncertainty, title= {What Matters More and When: Epistemic or Aleatoric Uncertainty?}, author= {Ma, Yecheng and Moore, Juston and Pleiss, Geoff and {Jayaraman}, {Dinesh} and Gardner, Jacob}, journal= {(under review)}, year= {2021}}
 % - comments @article{lee2021perimeter, title= {Vision-Based Perimeter Defense Via Multi-View Active Pose Estimation}, author= {Lee, Elijah and Loianno, Giuseppe and {Jayaraman}, {Dinesh} and Kumar, Vijay}, journal= {(under review)}, year= {2021}}
 @article{qian2021flood, title= {Object Representations Guided By Optical Flow}, author= {Qian, Jianing and {Jayaraman}, {Dinesh}}, journal= {NeurIPS 4th Robot Learning Workshop: Self-Supervised and Lifelong Learning}, year= {2021}}
 @article{wen2021keyframe, title= {Keyframe-focused visual imitation learning}, author= {Wen, Chuan and Lin, Jierui and Qian, Jianing and Gao, Yang and {Jayaraman}, {Dinesh}}, journal= {ICML}, year= {2021}}
 @article{xu2021limits, title={How Are Learned Perception-Based Controllers Impacted by the Limits of Robust Control?},journal={L4DC}, author={Jingxi Xu and Bruce Lee and Nikolai Matni and {Dinesh} {Jayaraman}}, year={2021}}
 @article{ramakrishnan2021exploration, author = {Ramakrishnan, Santhosh K and {Jayaraman}, {Dinesh} and Grauman, Kristen}, journal = {IJCV}, title = {An exploration of embodied visual exploration}, year = {2021}}
 @article{berseth2021smirl, author={Glen Berseth and Daniel Geng and Coline Devin and Chelsea Finn and {Dinesh} {Jayaraman} and Sergey Levine}, title={{SMiRL}: Surprise Minimizing RL in Dynamic Environments}, year = {2021}, journal ={ICLR}}
 @inproceedings{chen2021covid, author= {Chen, Hui and Li, Zhao and Feng, Sheng and Wang, Anni and Richard-Greenblatt, Melissa and Hutson, Emily and Andrianus, Stefen and Glaser, Laurel J. and Rodino, Kyle G. and Qian, Jianing and {Jayaraman}, {Dinesh} and Collman, Ronald G. and Glascock, Abigail and Bushman, Frederic D. and Lee, Jae Seung and Cherry, Sara and Fausto, Alejandra and Weiss, Susan R. and Koo, Hyun and Corby, Patricia M. and O{\textquoteright}Doherty, Una and Garfall, Alfred L. and Vogl, Dan T. and Stadtmauer, Edward A. and Wang, Ping}, title= {Femtomolar SARS-CoV-2 Antigen Detection Using the Microbubbling Digital Assay with Smartphone Readout Enables Antigen Burden Quantitation and Dynamics Tracking}, elocation-id= {2021.03.17.21253847}, year= {2021}, doi= {10.1101/2021.03.17.21253847}, publisher= {Cold Spring Harbor Laboratory Press}, abstract= {Background Little is known about the dynamics of SARS-CoV-2 antigen burden in respiratory samples in different patient populations at different stages of infection. Current rapid antigen tests cannot quantitate and track antigen dynamics with high sensitivity and specificity in respiratory samples.Methods We developed and validated an ultra-sensitive SARS-CoV-2 antigen assay with smartphone readout using the Microbubbling Digital Assay previously developed by our group, which is a platform that enables highly sensitive detection and quantitation of protein biomarkers. A computer vision-based algorithm was developed for microbubble smartphone image recognition and quantitation. A machine learning-based classifier was developed to classify the smartphone images based on detected microbubbles. Using this assay, we tracked antigen dynamics in serial swab samples from COVID patients hospitalized in ICU and immunocompromised COVID patients.Results The limit of detection (LOD) of the Microbubbling SARS-CoV-2 Antigen Assay was 0.5 pg/mL (10.6 fM) recombinant nucleocapsid (N) antigen or 4000 copies/mL inactivated SARS-CoV-2 virus in nasopharyngeal (NP) swabs, comparable to many rRT-PCR methods. The assay had high analytical specificity towards SARS-CoV-2. Compared to EUA-approved rRT-PCR methods, the Microbubbling Antigen Assay demonstrated a positive percent agreement (PPA) of 97\% (95\% confidence interval (CI), 92-99\%) in symptomatic individuals within 7 days of symptom onset and positive SARS-CoV-2 nucleic acid results, and a negative percent agreement (NPA) of 97\% (95\% CI, 94-100\%) in symptomatic and asymptomatic individuals with negative nucleic acid results. Antigen positivity rate in NP swabs gradually decreased as days-after-symptom-onset increased, despite persistent nucleic acid positivity of the same samples. The computer vision and machine learning-based automatic microbubble image classifier could accurately identify positives and negatives, based on microbubble counts and sizes. Total microbubble volume, a potential marker of antigen burden, correlated inversely with Ct values and days-after-symptom-onset. Antigen was detected for longer periods of time in immunocompromised patients with hematologic malignancies, compared to immunocompetent individuals. Simultaneous detectable antigens and nucleic acids may indicate the presence of replicating viruses in patients with persistent infections.Conclusions The Microbubbling SARS-CoV-2 Antigen Assay enables sensitive and specific detection of acute infections, and quantitation and tracking of antigen dynamics in different patient populations at various stages of infection. With smartphone compatibility and automated image processing, the assay is well-positioned to be adapted for point-of-care diagnosis and to explore the clinical implications of antigen dynamics in future studies.Competing Interest StatementThe authors have declared no competing interest.Funding StatementHC, ZL and PW have received support from National Institute of Health grants R01DA035868, R01EB029363 and National Science Foundation grant 1928334. SRW has received support from National Institute of Health grant R01AI40442 and Penn Center for Research on Coronaviruses and Other Emerging Pathogens. We thank the RADx-Tech Program, Penn Center for Precision Medicine, Penn Health-Tech and Penn Center for Innovation \&amp; Precision Dentistry for providing funding for this project. This work was carried out in part at the Singh Center for Nanotechnology, part of the National Nanotechnology Coordinated Infrastructure Program, which is supported by the National Science Foundation grant NNCI-2025608.Author DeclarationsI confirm all relevant ethical guidelines have been followed, and any necessary IRB and/or ethics committee approvals have been obtained.YesThe details of the IRB/oversight body that provided approval or exemption for the research described are given below:The study was approved by the Institutional Review Board of the University of Pennsylvania.All necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).YesI have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesAll data referred to in the manuscript is available from the research team. The OpenCV-based Python code for the computer vision and machine learning pipeline is available at the following address: https://github.com/jianingq/microbuble-detection-and-classification.git.}, URL= {https://www.medrxiv.org/content/early/2021/03/26/2021.03.17.21253847}, eprint= {https://www.medrxiv.org/content/early/2021/03/26/2021.03.17.21253847.full.pdf}, journal= {medRxiv}}
 @article{das2020keypointirl, author = {Neha Das and Sarah Bechtle and Todor Davchev and {Dinesh} {Jayaraman} and Akshara Rai and Franziska Meier}, journal = {CORL}, title = {Model-Based Inverse Reinforcement Learning from Visual Demonstrations}, year = {2020}}
 @article{zhang2020cautious, title= {Cautious adaptation for reinforcement learning in safety-critical settings}, author= {Zhang, Jesse and Cheung, Brian and Finn, Chelsea and Levine, Sergey and {Jayaraman}, {Dinesh}}, journal= {ICML}, year= {2020}}
 @article{lambeta2020digit, title= {Digit: A novel design for a low-cost compact high-resolution tactile sensor with application to in-hand manipulation}, author= {Lambeta, Mike and Chou, Po-Wei and Tian, Stephen and Yang, Brian and Maloon, Benjamin and Most, Victoria Rose and Stroud, Dave and Santos, Raymond and Byagowi, Ahmad and Kammerer, Gregg and {Jayaraman}, {Dinesh} and Calandra, Roberto}, journal= {ICRA and IEEE RA-L}, year= {2020}}
 @article{ma2021diverse, title= {Likelihood-Based Diverse Sampling for Trajectory Forecasting}, author= {Ma, Yecheng Jason and Inala, Jeevana Priya and {Jayaraman}, {Dinesh} and Bastani, Osbert}, journal= {ICCV}, year= {2021}}
 @article{wen2020copycat, title= {Fighting Copycat Agents in Behavioral Cloning from Observation Histories}, author= {Wen, Chuan and Lin, Jierui and Darrell, Trevor and {Jayaraman}, {Dinesh} and Gao, Yang}, journal= {NeurIPS}, year= {2020}}
 @article{pertsch2020long, title= {Long-horizon visual planning with goal-conditioned hierarchical predictors}, author= {Pertsch, Karl and Rybkin, Oleh and Ebert, Frederik and {Jayaraman}, {Dinesh} and Finn, Chelsea and Levine, Sergey}, journal= {NeurIPS}, year= {2020}}
 @article{yang2020mavric, title={{MAVRIC}: Morphology-Agnostic Visual Robotic Control}, author={Yang*, Brian and {Jayaraman}*, {Dinesh} and Berseth, Glen and Efros, Alexei and Levine, Sergey}, journal={ICRA and IEEE RA-L}, year={2020}}
 @article{de2019causal, title= {Causal Confusion in Imitation Learning}, author= {de Haan, Pim and {Jayaraman}, {Dinesh} and Levine, Sergey}, journal= {NeurIPS}, year= {2019}}
 @article{ramakrishnan2019emergence, title= {Emergence of exploratory look-around behaviors through active observation completion}, author= {Ramakrishnan*, Santhosh K and {Jayaraman}*, {Dinesh} and Grauman, Kristen}, journal= {Science Robotics}, year= {2019}}
 @article{tian2019manipulation, title= {Manipulation by feel: Touch-based control with deep predictive models}, author= {Tian, Stephen and Ebert, Frederik and {Jayaraman}, {Dinesh} and Mudigonda, Mayur and Finn, Chelsea and Calandra, Roberto and Levine, Sergey}, journal= {ICRA}, year= {2019}}
 @article{yang2019replab, title= {REPLAB: A reproducible low-cost arm benchmark for robotic learning}, author= {Yang, Brian and {Jayaraman}, {Dinesh} and Zhang, Jesse and Levine, Sergey}, journal= {ICRA}, year={2019}}
 @article{jayaraman2019time, title= {Time-agnostic prediction: Predicting predictable video frames}, author= {{Jayaraman}, {Dinesh} and Ebert, Frederik and Efros, Alexei A and Levine, Sergey}, journal= {ICLR}, year= {2019}}
 @misc{ma2018techniques, title= {Techniques for rectification of camera arrays}, author= {Ma, Tao and Sun, Wei and Nestares, Oscar and Seshadrinathan, Kalpana and {Jayaraman}, {Dinesh}}, year= {2018}, month= {jan~23}, note= {US Patent 9,875,543}}
 @article{jayaraman2018end, title= {End-to-end policy learning for active visual categorization}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {IEEE TPAMI}, year= {2018}}
 @article{jayaraman2018learning, title= {Learning to look around: Intelligently exploring unseen environments for unknown tasks}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {CVPR}, year= {2018}}
 @article{calandra2018more, title= {More Than a Feeling: Learning to Grasp and Regrasp using Vision and Touch}, author= {Calandra, Roberto and Owens, Andrew and {Jayaraman}, {Dinesh} and Lin, Justin and Yuan, Wenzhen and Malik, Jitendra and Adelson, Edward H and Levine, Sergey}, journal= {IROS and IEEE RA-L}, year= {2018}}
 @article{jayaraman2018shapecodes, title= {Shapecodes: self-supervised feature learning by lifting views to viewgrids}, author= {{Jayaraman}, {Dinesh} and Gao, Ruohan and Grauman, Kristen}, journal= {ECCV}, year= {2018}}
 @misc{nestares2017techniques, title= {Techniques for improved focusing of camera arrays}, author= {Nestares, Oscar and Seshadrinathan, Kalpana and {Jayaraman}, {Dinesh}}, year= {2017}, month= {aug~22}, note= {US Patent 9,743,016}}
 @incollection{chen2017divide, title= {Divide, share, and conquer: Multi-task attribute learning with selective sharing}, author= {{Jayaraman}, {{Dinesh}} and Chen, Chao-Yeh and Sha, Fei and Grauman, Kristen}, booktitle= {Visual attributes}, pages= {49--85}, year= {2017}, publisher= {Springer, Cham}}
 @phdthesis{jayaraman2017embodied, title= {Embodied learning for visual recognition}, author= {{Jayaraman}, {Dinesh}}, year= {2017}, school= {UT Austin}}
 @article{jayaraman2017learning, title= {Learning Image Representations Tied to Egomotion from Unlabeled Video}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {IJCV Special Issue of Best Papers from ICCV 2015}, year= {2017}}
 @article{jayaraman2016look, title= {Look-ahead before you leap: end-to-end active recognition by forecasting the effect of motion}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {ECCV}, year= {2016}}
 @article{gao2016object, title= {Object-Centric Representation Learning from Unlabeled Videos}, author= {Gao, Ruohan and {Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {ACCV}, year= {2016}}
 @article{su2016pano2vid, title= {Pano2Vid: Automatic cinematography for watching 360-degree videos}, author= {Su, Yu-Chuan and {Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {ACCV}, year= {2016}}
 @article{jayaraman2016slow, title= {Slow and steady feature analysis: higher order temporal coherence in video}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {CVPR}, year= {2016}}
 @article{jayaraman2015learning, title= {Learning image representations tied to ego-motion}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {ICCV}, year= {2015}}
 @article{jayaraman2014decorrelating, title= {Decorrelating semantic visual attributes by resisting the urge to share}, author= {{Jayaraman}, {Dinesh} and Sha, Fei and Grauman, Kristen}, journal={CVPR}, year= {2014}}
 @article{jayaraman2014zero, title= {Zero-shot recognition with unreliable attributes}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {NeurIPS}, year= {2014}}
 @article{jayaraman2012objective, title= {Objective quality assessment of multiply distorted images}, author= {{Jayaraman}, {Dinesh} and Mittal, Anish and Moorthy, Anush K and Bovik, Alan C}, journal= {ASILOMAR Signals, Systems and Computers}, year= {2012}}