Python for Alteryx users
May 24, 2020
We will recreate this Alteryx workflow using Python
Text input
import pandas as pd
text_input = pd.DataFrame(
{ "Person" : [ "You" , "Me" , "Everyone else" ], "Number" : [ 1 , 1 , 99 ]}
)
text_input
Person
Number 0
You
1 1
Me
1 2
Everyone else
99
import pandas as pd
input_data = pd.read_csv( "https://query.data.world/s/whbjtalht4sigazly6hdnnb5ynqald" )
input_data.head()
Category
City
Country
Customer Name
Discount
Number of Records
Order Date
Order ID
Postal Code
Manufacturer
Product Name
Profit
Quantity
Region
Sales
Segment
Ship Date
Ship Mode
State
Sub-Category 0
Furniture
Henderson
United States
Claire Gute
0.00
1
11/8/2017
CA-2017-152156
42420.0
Bush
Bush Somerset Collection Bookcase
41.9136
2
South
261.9600
Consumer
11/11/2017
Second Class
Kentucky
Bookcases 1
Furniture
Henderson
United States
Claire Gute
0.00
1
11/8/2017
CA-2017-152156
42420.0
Hon
Hon Deluxe Fabric Upholstered Stacking Chairs,…
219.5820
3
South
731.9400
Consumer
11/11/2017
Second Class
Kentucky
Chairs 2
Office Supplies
Los Angeles
United States
Darrin Van Huff
0.00
1
6/12/2017
CA-2017-138688
90036.0
Universal
Self-Adhesive Address Labels for Typewriters b…
6.8714
2
West
14.6200
Corporate
6/16/2017
Second Class
California
Labels 3
Furniture
Fort Lauderdale
United States
Sean O’Donnell
0.45
1
10/11/2016
US-2016-108966
33311.0
Bretford
Bretford CR4500 Series Slim Rectangular Table
-383.0310
5
South
957.5775
Consumer
10/18/2016
Standard Class
Florida
Tables 4
Office Supplies
Fort Lauderdale
United States
Sean O’Donnell
0.20
1
10/11/2016
US-2016-108966
33311.0
Eldon
Eldon Fold ‘N Roll Cart System
2.5164
2
South
22.3680
Consumer
10/18/2016
Standard Class
Florida
Storage
selected_data = (
input_data.drop( columns = [ "Ship Date" , "Ship Mode" ]) # Example of deselecting
.loc[
:, [ "Order ID" , "Category" , "Sub-Category" , "Product Name" , "Sales" , "Quantity" ]
] # Example of selecting
.rename( columns = { "Sub-Category" : "Sub-Cat" , "Product Name" : "Product" })
.astype({ "Quantity" : "int64" , "Category" : "str" })
)
selected_data.head()
Order ID
Category
Sub-Cat
Product
Sales
Quantity 0
CA-2017-152156
Furniture
Bookcases
Bush Somerset Collection Bookcase
261.9600
2 1
CA-2017-152156
Furniture
Chairs
Hon Deluxe Fabric Upholstered Stacking Chairs,…
731.9400
3 2
CA-2017-138688
Office Supplies
Labels
Self-Adhesive Address Labels for Typewriters b…
14.6200
2 3
US-2016-108966
Furniture
Tables
Bretford CR4500 Series Slim Rectangular Table
957.5775
5 4
US-2016-108966
Office Supplies
Storage
Eldon Fold ‘N Roll Cart System
22.3680
2
selected_data[ "Total Sales" ] = selected_data[ "Sales" ] * selected_data[ "Quantity" ]
selected_data.head()
Order ID
Category
Sub-Cat
Product
Sales
Quantity
Total Sales 0
CA-2017-152156
Furniture
Bookcases
Bush Somerset Collection Bookcase
261.9600
2
523.9200 1
CA-2017-152156
Furniture
Chairs
Hon Deluxe Fabric Upholstered Stacking Chairs,…
731.9400
3
2195.8200 2
CA-2017-138688
Office Supplies
Labels
Self-Adhesive Address Labels for Typewriters b…
14.6200
2
29.2400 3
US-2016-108966
Furniture
Tables
Bretford CR4500 Series Slim Rectangular Table
957.5775
5
4787.8875 4
US-2016-108966
Office Supplies
Storage
Eldon Fold ‘N Roll Cart System
22.3680
2
44.7360
filtered_data = selected_data[selected_data[ "Sales" ] > 1000 ]
# To get the false
sales_under_1000 = selected_data[selected_data[ "Sales" ] < 1000 ]
filtered_data.head()
Order ID
Category
Sub-Cat
Product
Sales
Quantity
Total Sales 10
CA-2015-115812
Furniture
Tables
Chromcraft Rectangular Conference Tables
1706.184
9
15355.656 24
CA-2016-106320
Furniture
Tables
Bretford CR4500 Series Slim Rectangular Table
1044.630
3
3133.890 27
US-2016-150630
Furniture
Bookcases
Riverside Palais Royal Lawyers Bookcase, Royal…
3083.430
7
21584.010 35
CA-2017-117590
Technology
Phones
GE 30524EE4
1097.544
7
7682.808 54
CA-2017-105816
Technology
Phones
AT&T CL83451 4-Handset Telephone
1029.950
5
5149.750
sales_under_1000.head()
Order ID
Category
Sub-Cat
Product
Sales
Quantity
Total Sales 0
CA-2017-152156
Furniture
Bookcases
Bush Somerset Collection Bookcase
261.9600
2
523.9200 1
CA-2017-152156
Furniture
Chairs
Hon Deluxe Fabric Upholstered Stacking Chairs,…
731.9400
3
2195.8200 2
CA-2017-138688
Office Supplies
Labels
Self-Adhesive Address Labels for Typewriters b…
14.6200
2
29.2400 3
US-2016-108966
Furniture
Tables
Bretford CR4500 Series Slim Rectangular Table
957.5775
5
4787.8875 4
US-2016-108966
Office Supplies
Storage
Eldon Fold ‘N Roll Cart System
22.3680
2
44.7360
sorted_data = filtered_data.sort_values( by = "Sales" , ascending = False )
sorted_data.head()
Order ID
Category
Sub-Cat
Product
Sales
Quantity
Total Sales 9941
CA-2015-145317
Technology
Machines
Cisco TelePresence System EX90 Videoconferenci…
22638.480
6
135830.880 8093
CA-2017-118689
Technology
Copiers
Canon imageCLASS 2200 Advanced Copier
17499.950
5
87499.750 4905
CA-2018-140151
Technology
Copiers
Canon imageCLASS 2200 Advanced Copier
13999.960
4
55999.840 9707
CA-2018-127180
Technology
Copiers
Canon imageCLASS 2200 Advanced Copier
11199.968
4
44799.872 5297
CA-2018-166709
Technology
Copiers
Canon imageCLASS 2200 Advanced Copier
10499.970
3
31499.910
summarized_data = (
sorted_data.groupby([ "Category" , "Sub-Cat" ])
.agg({ "Sales" : "sum" })
.rename( columns = { "Sales" : "Sales by Sub-Cat" })
).reset_index()
summarized_data.head()
Category
Sub-Cat
Sales by Sub-Cat 0
Furniture
Bookcases
49521.8256 1
Furniture
Chairs
129547.8520 2
Furniture
Furnishings
5638.9320 3
Furniture
Tables
102125.0490 4
Office Supplies
Appliances
36742.7900
summarized_data_alternative = sorted_data.groupby([ "Category" , "Sub-Cat" ]).agg(
Sales_by_Sub_Cat = pd.NamedAgg( column = "Sales" , aggfunc = sum )
).reset_index()
summarized_data_alternative.head()
Category
Sub-Cat
Sales_by_Sub_Cat 0
Furniture
Bookcases
49521.8256 1
Furniture
Chairs
129547.8520 2
Furniture
Furnishings
5638.9320 3
Furniture
Tables
102125.0490 4
Office Supplies
Appliances
36742.7900
joined_data = pd.merge(
sorted_data, summarized_data, on = [ "Category" , "Sub-Cat" ], how = "inner"
)
joined_data.head()
Order ID
Category
Sub-Cat
Product
Sales
Quantity
Total Sales
Sales by Sub-Cat 0
CA-2015-145317
Technology
Machines
Cisco TelePresence System EX90 Videoconferenci…
22638.480
6
135830.880
162022.19 1
US-2017-107440
Technology
Machines
3D Systems Cube Printer, 2nd Generation, Magenta
9099.930
7
63699.510
162022.19 2
CA-2017-158841
Technology
Machines
HP Designjet T520 Inkjet Large Format Printer …
8749.950
5
43749.750
162022.19 3
CA-2015-139892
Technology
Machines
Lexmark MX611dhe Monochrome Laser Printer
8159.952
8
65279.616
162022.19 4
US-2018-168116
Technology
Machines
Cubify CubeX 3D Printer Triple Head Print
7999.980
4
31999.920
162022.19
left_join = pd.merge(
sorted_data, summarized_data, on = [ "Category" , "Sub-Cat" ], how = "left" , indicator = True
)
left_join = left_join[left_join[ "_merge" ] == "left_only" ]
left_join.drop( columns = "_merge" , inplace = True )
left_join.head()
Order ID
Category
Sub-Cat
Product
Sales
Quantity
Total Sales
Sales by Sub-Cat
unioned_data = joined_data.append(sales_under_1000)
unioned_data.head()
Category
Order ID
Product
Quantity
Sales
Sales by Sub-Cat
Sub-Cat
Total Sales 0
Technology
CA-2015-145317
Cisco TelePresence System EX90 Videoconferenci…
6
22638.480
162022.19
Machines
135830.880 1
Technology
US-2017-107440
3D Systems Cube Printer, 2nd Generation, Magenta
7
9099.930
162022.19
Machines
63699.510 2
Technology
CA-2017-158841
HP Designjet T520 Inkjet Large Format Printer …
5
8749.950
162022.19
Machines
43749.750 3
Technology
CA-2015-139892
Lexmark MX611dhe Monochrome Laser Printer
8
8159.952
162022.19
Machines
65279.616 4
Technology
US-2018-168116
Cubify CubeX 3D Printer Triple Head Print
4
7999.980
162022.19
Machines
31999.920
unioned_data.to_csv( "Sales summary.csv" , index = False )