From 43951cc8ed0c5e02b8ff21e47427c9d0c674cf05 Mon Sep 17 00:00:00 2001 From: divith raju <118492176+divithraju@users.noreply.github.com> Date: Sun, 8 Sep 2024 13:45:36 +0530 Subject: [PATCH] Create Web-user.py Add Web-user.py for PySpark and update contributors list This pull request adds the Web-user.py script for PySpark functionality and includes myself (divith raju) in the contributors list. --- Web-user.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 Web-user.py diff --git a/Web-user.py b/Web-user.py new file mode 100644 index 0000000..c10b2c3 --- /dev/null +++ b/Web-user.py @@ -0,0 +1,23 @@ +# Find the count of unique visitors to a website per day. + +from pyspark.sql import SparkSession, Row +from pyspark.sql.functions import countDistinct + +# Initialize Spark session +spark = SparkSession.builder.appName("UniqueVisitorsPerDay").getOrCreate() + +# Sample data +visitor_data = [Row(Date='2023-01-01', VisitorID=101), + Row(Date='2023-01-01', VisitorID=102), + Row(Date='2023-01-01', VisitorID=101), + Row(Date='2023-01-02', VisitorID=103), + Row(Date='2023-01-02', VisitorID=101)] + +# Create DataFrame +df_visitors = spark.createDataFrame(visitor_data) + +# Count unique visitors per day +unique_visitors = df_visitors.groupBy('Date').agg(countDistinct('VisitorID').alias('UniqueVisitors')) + +# Show results +unique_visitors.show()