>>> import numpy as np
>>> import vaex
>>> # Load Vaex example
>>> df = vaex.example()
>>> # Create a virtual column
>>> df.add_virtual_column("r", "sqrt(x**2 + y**2 + z**2)")
>>> # Create a __dataframe__ instance
>>> df_protocol = df.__dataframe__()
>>> # Inspecting the metadata and selecting columns does not yet
>>> # materialize all the buffers
>>> df_protocol.num_columns()
11
>>> df_protocol.column_names()
['id', 'x', 'y', 'z', 'vx', 'vy', 'vz', 'E', 'L', 'Lz', 'FeH', 'r']
>>> # Chunk the data
>>> df_protocol.num_chunks()
1
>>> df_protocol.get_chunks(33)
<generator object _VaexDataFrame.get_chunks at 0x16c1703c0>
>>> next(df_protocol.get_chunks(33)).num_rows()
10000
>>> # Select a subset of columns
>>> df_protocol.select_columns_by_name(['x', 'y']).num_rows()
330000
>>> # Read in the virtual column
>>> column = df_protocol.__dataframe__().get_column_by_name("r")
>>> column.size()
330000
>>> # Only when actually asking for the buffers of one chunk of a column,
>>> # the data needs to be in memory (to pass a pointer to the buffers)
>>> column.get_buffers()
{'data': (VaexBuffer({'bufsize': 1320000, 'ptr': 5236260864, 'device': 'CPU'}), (<_DtypeKind.FLOAT: 2>, 32, '<f4', '=')), 'validity': None, 'offsets': None}
Last active
March 22, 2023 13:10
-
-
Save AlenkaF/f0f1ea95255555024fc7b4c488135e67 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Line # Mem usage Increment Occurrences Line Contents | |
============================================================= | |
7 147.8 MiB 147.8 MiB 1 @profile | |
8 def my_func(): | |
9 # Load Vaex example | |
10 173.0 MiB 25.3 MiB 1 df = vaex.example() | |
11 # Create a virtual column | |
12 173.0 MiB 0.0 MiB 1 df.add_virtual_column("r", "sqrt(x**2 + y**2 + z**2)") | |
13 | |
14 # Create a __dataframe__ instance | |
15 173.2 MiB 0.1 MiB 1 df_protocol = df.__dataframe__() | |
16 | |
17 # Inspecting the metadata and selecting columns does not yet | |
18 # materialize all the buffers | |
19 173.2 MiB 0.0 MiB 1 df_protocol.num_columns() | |
20 173.2 MiB 0.0 MiB 1 df_protocol.column_names() | |
21 | |
22 # Chunk the data | |
23 173.2 MiB 0.0 MiB 1 df_protocol.num_chunks() | |
24 173.2 MiB 0.0 MiB 1 df_protocol.get_chunks(33) | |
25 175.8 MiB 2.6 MiB 1 next(df_protocol.get_chunks(33)).num_rows() | |
26 | |
27 # Select a subset of columns | |
28 175.8 MiB 0.0 MiB 1 df_protocol.select_columns_by_name(['x', 'y']).num_rows() | |
29 | |
30 # Read in the virtual column | |
31 175.8 MiB 0.0 MiB 1 column = df_protocol.__dataframe__().get_column_by_name("r") | |
32 175.8 MiB 0.0 MiB 1 column.size() | |
33 | |
34 # Only when actually asking for the buffers of one chunk of a column, | |
35 # the data needs to be in memory (to pass a pointer to the buffers) | |
36 187.8 MiB 12.1 MiB 1 column.get_buffers() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from memory_profiler import profile | |
import numpy as np | |
import vaex | |
@profile | |
def my_func(): | |
# Load Vaex example | |
df = vaex.example() | |
# Create a virtual column | |
df.add_virtual_column("r", "sqrt(x**2 + y**2 + z**2)") | |
# Create a __dataframe__ instance | |
df_protocol = df.__dataframe__() | |
# Inspecting the metadata and selecting columns does not yet | |
# materialize all the buffers | |
df_protocol.num_columns() | |
df_protocol.column_names() | |
# Chunk the data | |
df_protocol.num_chunks() | |
df_protocol.get_chunks(33) | |
next(df_protocol.get_chunks(33)).num_rows() | |
# Select a subset of columns | |
df_protocol.select_columns_by_name(['x', 'y']).num_rows() | |
# Read in the virtual column | |
column = df_protocol.__dataframe__().get_column_by_name("r") | |
column.size() | |
# Only when actually asking for the buffers of one chunk of a column, | |
# the data needs to be in memory (to pass a pointer to the buffers) | |
column.get_buffers() | |
if __name__ == '__main__': | |
my_func() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment