AlenkaF · March 22, 2023 13:10
diff --git a/profiler_ouput.txt b/profiler_ouput.txt
 Line #    Mem usage    Increment  Occurrences   Line Contents
 =============================================================
     7    147.8 MiB    147.8 MiB           1   @profile
     8                                         def my_func():
     9                                             # Load Vaex example
    10    173.0 MiB     25.3 MiB           1       df = vaex.example()
    11                                             # Create a virtual column
    12    173.0 MiB      0.0 MiB           1       df.add_virtual_column("r", "sqrt(x**2 + y**2 + z**2)")
    13                                         
    14                                             # Create a __dataframe__ instance
    15    173.2 MiB      0.1 MiB           1       df_protocol = df.__dataframe__()
    16                                         
    17                                             # Inspecting the metadata and selecting columns does not yet
    18                                             # materialize all the buffers
    19    173.2 MiB      0.0 MiB           1       df_protocol.num_columns()
    20    173.2 MiB      0.0 MiB           1       df_protocol.column_names()
    21                                         
    22                                             # Chunk the data
    23    173.2 MiB      0.0 MiB           1       df_protocol.num_chunks()
    24    173.2 MiB      0.0 MiB           1       df_protocol.get_chunks(33)
    25    175.8 MiB      2.6 MiB           1       next(df_protocol.get_chunks(33)).num_rows()
    26                                         
    27                                             # Select a subset of columns
    28    175.8 MiB      0.0 MiB           1       df_protocol.select_columns_by_name(['x', 'y']).num_rows()
    29                                         
    30                                             # Read in the virtual column
    31    175.8 MiB      0.0 MiB           1       column = df_protocol.__dataframe__().get_column_by_name("r")
    32    175.8 MiB      0.0 MiB           1       column.size()
    33                                         
    34                                             # Only when actually asking for the buffers of one chunk of a column,
    35                                             # the data needs to be in memory (to pass a pointer to the buffers)
    36    187.8 MiB     12.1 MiB           1       column.get_buffers()
diff --git a/python_output.rst b/python_output.rst
diff --git a/script.py b/script.py
 from memory_profiler import profile

 import numpy as np
 import vaex


 @profile
 def my_func():
    # Load Vaex example
    df = vaex.example()
    # Create a virtual column
    df.add_virtual_column("r", "sqrt(x**2 + y**2 + z**2)")

    # Create a __dataframe__ instance
    df_protocol = df.__dataframe__()

    # Inspecting the metadata and selecting columns does not yet
    # materialize all the buffers
    df_protocol.num_columns()
    df_protocol.column_names()

    # Chunk the data
    df_protocol.num_chunks()
    df_protocol.get_chunks(33)
    next(df_protocol.get_chunks(33)).num_rows()

    # Select a subset of columns
    df_protocol.select_columns_by_name(['x', 'y']).num_rows()

    # Read in the virtual column
    column = df_protocol.__dataframe__().get_column_by_name("r")
    column.size()

    # Only when actually asking for the buffers of one chunk of a column,
    # the data needs to be in memory (to pass a pointer to the buffers)
    column.get_buffers()

 if __name__ == '__main__':
    my_func()
	Line # Mem usage Increment Occurrences Line Contents
	=============================================================
	7 147.8 MiB 147.8 MiB 1 @profile
	8 def my_func():
	9 # Load Vaex example
	10 173.0 MiB 25.3 MiB 1 df = vaex.example()
	11 # Create a virtual column
	12 173.0 MiB 0.0 MiB 1 df.add_virtual_column("r", "sqrt(x2 + y2 + z**2)")
	13
	14 # Create a __dataframe__ instance
	15 173.2 MiB 0.1 MiB 1 df_protocol = df.__dataframe__()
	16
	17 # Inspecting the metadata and selecting columns does not yet
	18 # materialize all the buffers
	19 173.2 MiB 0.0 MiB 1 df_protocol.num_columns()
	20 173.2 MiB 0.0 MiB 1 df_protocol.column_names()
	21
	22 # Chunk the data
	23 173.2 MiB 0.0 MiB 1 df_protocol.num_chunks()
	24 173.2 MiB 0.0 MiB 1 df_protocol.get_chunks(33)
	25 175.8 MiB 2.6 MiB 1 next(df_protocol.get_chunks(33)).num_rows()
	26
	27 # Select a subset of columns
	28 175.8 MiB 0.0 MiB 1 df_protocol.select_columns_by_name(['x', 'y']).num_rows()
	29
	30 # Read in the virtual column
	31 175.8 MiB 0.0 MiB 1 column = df_protocol.__dataframe__().get_column_by_name("r")
	32 175.8 MiB 0.0 MiB 1 column.size()
	33
	34 # Only when actually asking for the buffers of one chunk of a column,
	35 # the data needs to be in memory (to pass a pointer to the buffers)
	36 187.8 MiB 12.1 MiB 1 column.get_buffers()
	from memory_profiler import profile

	import numpy as np
	import vaex


	@profile
	def my_func():
	# Load Vaex example
	df = vaex.example()
	# Create a virtual column
	df.add_virtual_column("r", "sqrt(x2 + y2 + z**2)")

	# Create a __dataframe__ instance
	df_protocol = df.__dataframe__()

	# Inspecting the metadata and selecting columns does not yet
	# materialize all the buffers
	df_protocol.num_columns()
	df_protocol.column_names()

	# Chunk the data
	df_protocol.num_chunks()
	df_protocol.get_chunks(33)
	next(df_protocol.get_chunks(33)).num_rows()

	# Select a subset of columns
	df_protocol.select_columns_by_name(['x', 'y']).num_rows()

	# Read in the virtual column
	column = df_protocol.__dataframe__().get_column_by_name("r")
	column.size()

	# Only when actually asking for the buffers of one chunk of a column,
	# the data needs to be in memory (to pass a pointer to the buffers)
	column.get_buffers()

	if __name__ == '__main__':
	my_func()