Skip to content

Instantly share code, notes, and snippets.

@bojanpotocnik
Created May 14, 2019 09:41
Show Gist options
  • Save bojanpotocnik/1e275a68c8c9421a0100140d5c5076ee to your computer and use it in GitHub Desktop.
Save bojanpotocnik/1e275a68c8c9421a0100140d5c5076ee to your computer and use it in GitHub Desktop.
Initialize rpy2 in Python, imported in the file where r("") will be used
import os
import sys
from collections import OrderedDict
from typing import Any, Union
# Numpy and Pandas are sure installed if one wants to use R to do stuff...
import numpy as np
import pandas as pd
# region Helper classes and functions
class WorkingDirectoryChanger:
def __init__(self, new_wd_path: str = None,
clear_workspace_on_exit: bool = True, garbage_collect_on_exit: bool = True) -> None:
"""
Some R scripts are written as such that they expect the working directory to be their directory or
they change the working directory during the execution. However when using rpy2, the R working directory
is also Python working directory. Therefore to prevent buggy behaviour, this context manager can be used
to "wrap the execution" of the R code in means of changing the working directory.
Python/R working directory will be changed to the one provided in __enter__ and then restored in __exit__
to the original, saved in __enter__.
:param new_wd_path: Path to directory to which the working directory shall be changed. If None then the
directory of the current Python file (__file__) will be used.
:param clear_workspace_on_exit: Whether to clear all objects (except functions) in __exit__.
This is useful to prevent residual objects consuming memory or
old values producing strange bugs.
:param garbage_collect_on_exit: Whether to run GC in __enter__. Useful to prevent excess memory consumption.
"""
# noinspection PyTypeChecker
self.original: str = None # Initialized in __enter__
# dirname() will return empty string if already in working dir
self.new_wd_path: str = new_wd_path or os.path.dirname(__file__) or "."
self.clear_workspace_on_exit = clear_workspace_on_exit
self.garbage_collect_on_exit = garbage_collect_on_exit
def __enter__(self) -> None:
self.original = os.getcwd()
os.chdir(self.new_wd_path)
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
if self.clear_workspace_on_exit:
# Remove all objects in the current working environment - except for functions.
# https://stackoverflow.com/a/8305850/5616255
r("rm(list = setdiff(ls(all=TRUE), lsf.str()))")
if self.garbage_collect_on_exit:
r("gc()") # Trigger garbage collection
os.chdir(self.original)
def _check_rpy2_module_installation() -> None:
try:
# noinspection PyUnresolvedReferences
import rpy2
except ModuleNotFoundError:
if os.name == 'nt':
# Windows
py_ver = f"cp{sys.version_info.major}{sys.version_info.minor}"
rpy2_whl_fn = f"rpy2‑_._._‑{py_ver}{py_ver}m‑{'win_amd64' if (sys.maxsize > 2 ** 32) else 'win32'}.whl"
raise ModuleNotFoundError(f"Please download {rpy2_whl_fn} rpy2 Windows binary"
f" from https://www.lfd.uci.edu/~gohlke/pythonlibs/#rpy2 ) and install it using"
f" 'pip install {rpy2_whl_fn}'") from None
else:
raise ModuleNotFoundError(f"Please install rpy2 using 'pip install rpy2'") from None
def _check_rpy2_required_env_variables() -> None:
# R_HOME and R_USER variables are required by the rpy2
import rpy2.situation
# R_HOME is provided by the R interpreter by executing `R RHOME`, but that is not possible if not in PATH.
if not rpy2.situation.get_r_home():
raise EnvironmentError("Please add directory containing R executable (R installation directory/bin) to the PATH"
" environment variable so that `R RHOME` can be executed from the terminal, or set the"
" R_HOME environment variable to the R installation directory (that is directory containing"
" bin/R) - this directory is returned by executing `R RHOME` command.")
# R_USER shall be provided by the user, but in this case set to the rpy2 installation directory.
try:
import rpy2.robjects
except RuntimeError as e:
if "R_USER not defined" in e.args[0]:
os.environ["R_USER"] = os.path.dirname(rpy2.__file__)
# import rpy2.robjects shall now work, import it later when required.
else:
raise e
# endregion Helper classes and functions
_check_rpy2_module_installation()
_check_rpy2_required_env_variables()
# rpy2.robjects must not be imported before calling _check_rpy2_required_env_variables()!
from rpy2.robjects import r # noqa (suppress "PEP 8: Module level import not at top of file")
import rpy2.rinterface as ri # noqa (suppress "PEP 8: Module level import not at top of file")
import rpy2.robjects as ro # noqa (suppress "PEP 8: Module level import not at top of file")
def rpy2py(r_object: Any) -> Union[None, Any,
np.ndarray, pd.DataFrame, pd.Series,
ro.Environment]: # Any as any built-in type.
"""
Convert R (rpy2) object to the "normal Python" object (built-in type, Numpy, Pandas, ...).
:param r_object: Object to convert recursively.
:return: Converted object.
"""
# https://stackoverflow.com/questions/24152160/converting-an-rpy2-listvector-to-a-python-dictionary
# Do not use isinstance() as many classes are subclasses of others, but require different conversion methods.
rot = type(r_object)
# Check built-in data types first
if rot.__module__ == "builtins":
return r_object
if any(rot is cls for cls in (ro.FloatVector, ro.IntVector, ro.BoolVector)):
if r_object.names == ri.NULL:
return np.array(list(map(rpy2py, r_object)))
# If names are present pd.Series shall probably be generated, but this is not yet tested.
if any(rot is cls for cls in (ro.DataFrame, ro.ListVector)):
if not r_object.names:
# This list is not named, just return the value.
if len(r_object):
return np.array(r_object)
# This list is empty.
return None
# Recursively convert values.
return OrderedDict(zip(r_object.names, map(rpy2py, r_object)))
if any(rot is cls for cls in (ri.NARealType, ri.NAIntegerType, ri.NACharacterType,
ri.NALogicalType, ri.NAComplexType, ri.RNULLType)):
# NARealType Missing value for a float in R
# NAIntegerType Missing value for an integer in R
# NACharacterType Missing value for a string
# NALogicalType Missing value for an boolean in R
# NAComplexType Missing value for a complex in R
# RNULLType R NULL (singleton)
return None
if rot is ro.FactorVector:
if not r_object.names:
# This list is not named, just return the value.
if len(r_object):
return np.array(list(r_object.iter_labels()))
# This list is empty.
return None
# Recursively convert values.
return OrderedDict(zip(r_object.names, map(rpy2py, r_object)))
if any(rot is cls for cls in (np.ndarray, pd.DataFrame, pd.Series)):
return r_object
if rot is ro.Environment:
# Known non yet supported types.
return r_object
print(f"Non converted type {type(r_object)}", file=sys.stderr)
return r_object
# Disable R GUI and graphic windows
r("pdf(file = NULL)")
r("dev.off()")
r("options(device=NULL)")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment