forked from spark-examples/pyspark-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyspark-sql-case-when.py
47 lines (41 loc) · 1.86 KB
/
pyspark-sql-case-when.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# -*- coding: utf-8 -*-
"""
author SparkByExamples.com
"""
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()
data = [("James","M",60000), ("Michael","M",70000),
("Robert",None,400000), ("Maria","F",500000),
("Jen","",None)]
columns = ["name","gender","salary"]
df = spark.createDataFrame(data = data, schema = columns)
df.show()
#Using When otherwise
from pyspark.sql.functions import when,col
df2 = df.withColumn("new_gender", when(df.gender == "M","Male")
.when(df.gender == "F","Female")
.when(df.gender.isNull() ,"")
.otherwise(df.gender))
df2.show()
df2 = df.withColumn("new_gender", when(df.gender == "M","Male")
.when(df.gender == "F","Female")
.when(df.gender.isNull() ,"")
.otherwise(df.gender))
df2=df.select(col("*"),when(df.gender == "M","Male")
.when(df.gender == "F","Female")
.when(df.gender.isNull() ,"")
.otherwise(df.gender).alias("new_gender"))
df2.show()
# Using SQL Case When
from pyspark.sql.functions import expr
df3 = df.withColumn("new_gender", expr("CASE WHEN gender = 'M' THEN 'Male' " +
"WHEN gender = 'F' THEN 'Female' WHEN gender IS NULL THEN ''" +
"ELSE gender END"))
df3.show()
df4 = df.select(col("*"), expr("CASE WHEN gender = 'M' THEN 'Male' " +
"WHEN gender = 'F' THEN 'Female' WHEN gender IS NULL THEN ''" +
"ELSE gender END").alias("new_gender"))
df.createOrReplaceTempView("EMP")
spark.sql("select name, CASE WHEN gender = 'M' THEN 'Male' " +
"WHEN gender = 'F' THEN 'Female' WHEN gender IS NULL THEN ''" +
"ELSE gender END as new_gender from EMP").show()