Generate a unique identifier that consistently produces the same result each time based on the values in the row. The ID column will be the first column positioned in the DataFrame.
from pyspark.sql.functions import sha2, concat_ws
columns = df.columns
df = df.withColumn(id_col, sha2(concat_ws("||", *df.columns), 256))
df = df.select([id_col] + columns)