Output the employee details whose salary is second highest in their department.
Note, Include all employees tied for second place
Input Data
Expected Output
Input DataFrame Script
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
data = [
("E001", "Alice Johnson", "HR", 58000),
("E002", "Brian Smith", "HR", 60000),
("E003", "Catherine Li", "HR", 62000),
("E004", "Daniel Brown", "HR", 59000),
("E005", "Eva Davis", "HR", 60500),
("E006", "Frank White", "IT", 85000),
("E007", "Grace Hall", "IT", 87000),
("E008", "Henry Green", "IT", 83000),
("E009", "Isabella King", "IT", 89000),
("E010", "Jackie Young", "IT", 86000),
("E011", "Karen Scott", "Sales", 67000),
("E012", "Liam Wright", "Sales", 70000),
("E013", "Mia Moore", "Sales", 72000),
("E014", "Nathan Clark", "Sales", 68000),
("E015", "Olivia Lewis", "Sales", 71000),
("E016", "Paul Turner", "Finance", 75000),
("E017", "Quincy Allen", "Finance", 77000),
("E018", "Rachel Adams", "Finance", 76000),
("E019", "Steven Baker", "Finance", 74000),
("E020", "Tina Nelson", "Finance", 75500),
("E021", "Uma Cox", "Marketing", 66000),
("E022", "Victor Diaz", "Marketing", 69000),
("E023", "Wendy Fox", "Marketing", 64000),
("E024", "Xavier Graham", "Marketing", 67500),
("E025", "Yvonne Hart", "Marketing", 68500)
]
schema = StructType([
StructField("EmployeeID", StringType(), True),
StructField("EmployeeName", StringType(), True),
StructField("Department", StringType(), True),
StructField("Salary", IntegerType(), True)
])
# Create DataFrame
fabricofdata_df = spark.createDataFrame(data, schema)
display(fabricofdata_df)
Try solving the question yourself! If you need help, click below to reveal the solution.