library(ggplot2) # Data visualization
library(readr) # CSV file I/O, e.g. the read_csv function
library(dplyr)
library(stringr)
system("ls ../input")
data <- read_csv("F:/Documents/Fleet Data.csv")
#making the data easier to deal with
colnames(data) <- c("parent_airline","airline","aircraft_type","current","future","historic","total","orders","unit_cost","total_cost","average_age")
#nrow(data)
data$total_cost <- as.numeric(gsub(",","",substring(data$total_cost,2)))
data$unit_cost <- as.numeric(gsub(",","",substring(data$unit_cost,2)))
data$plane_brand <- word(data$aircraft_type,1)
data <- data %>% mutate(newcost=ifelse(!is.na(orders),orders*unit_cost,0))
#adding flags for airbus and boeing
data <- data %>% mutate(airbus=ifelse(plane_brand=="Airbus",ifelse(is.na(current),0,current),0))
data <- data %>% mutate(boeing=ifelse(plane_brand=="Boeing",ifelse(is.na(current),0,current),0))
data <- data %>% mutate(embraer=ifelse(plane_brand=="Embraer",ifelse(is.na(current),0,current),0))
data <- data %>% mutate(bombardier=ifelse(plane_brand=="Bombardier",ifelse(is.na(current),0,current),0))
data <- data %>% mutate(mitsubishi=ifelse(plane_brand=="Mitsubishi",ifelse(is.na(current),0,current),0))
data <- data %>% mutate(embraer=ifelse(plane_brand=="Embraer",ifelse(is.na(current),0,current),0))
data <- data %>% mutate(comac=ifelse(plane_brand %in% c("COMAC","comac"),ifelse(is.na(current),0,current),0))
data <- data %>% mutate(mcdonnell=ifelse(plane_brand=="McDonnell",ifelse(is.na(current),0,current),0))
data <- data %>% mutate(canadair=ifelse(plane_brand=="Canadair",ifelse(is.na(current),0,current),0))
data <- data %>% mutate(atr=ifelse(plane_brand=="ATR",ifelse(is.na(current),0,current),0))
#table(data$plane_brand)
#adding flags for future airbus and future boeing
data <- data %>% mutate(airbus_future=ifelse(plane_brand=="Airbus",ifelse(is.na(orders),0,orders),0))
data <- data %>% mutate(boeing_future=ifelse(plane_brand=="Boeing",ifelse(is.na(orders),0,orders),0))
#The below list is not exhaustive.
lowcostcarriers<- c("AirAsia X","Gol Linhas Aereas","Azul","Tigerair Australia","Volaris","Interjet","Spirit Airlines","Frontier Airlines","Allegiant Air","WestJet","Air Canada Rouge","FlyDubai","Up","Monarch Airlines","Jet2","Flybe","Vueling","Iberia Express","Pobeda","Transavia","Germanwings","Eurowings","easyJet","Wizz Air","Lion Air","IndiGo", "Southwest Airlines", "RyanAir","AirAsia","LionAir","JetBlue Airways","Norwegian Air","WestJet","Pegasus Airlines","Flybe","SpiceJet","Air Arabia Egypt","Air Arabia Maroc","Air Cairo","Jambojet Limited","Mango","China United Airlines","Spring Airlines","Cebu Pacific Air","Jetstar Airways","AirAsia India","AirAsia Japan")
data$type <- NA
data <- data %>% mutate(type=ifelse(airline %in% lowcostcarriers,"Low Cost","Traditional"))
head(data %>% filter(type=="Low Cost"))
length(unique(data$airline))
length(unique(data$parent_airline))
#Let's find largest airlines
#lets find out which airlines have the largest fleets
airline_fleet <- data %>% group_by(airline) %>% summarise(fleet=sum(current,na.rm=TRUE),historic=sum(historic,na.rm=TRUE),orders=sum(orders,na.rm=TRUE),ratio=100*(orders/fleet),current_fleet_cost=sum(total_cost,na.rm=TRUE),value_per_fleet_unit=current_fleet_cost/fleet)
airline_fleet <- airline_fleet %>% mutate(type=ifelse(airline %in% lowcostcarriers,"Low Cost","Traditional"))
head(airline_fleet %>% arrange(desc(fleet)))
#let's find out which airlines have the most orders
head(airline_fleet %>% arrange(desc(orders)))
#ratio of orders vs current fleet for fleets more than 10
#head(airline_fleet %>% filter(fleet>10) %>% arrange(desc(ratio)))
#largest fleet costs carriers
#head(airline_fleet %>% filter(fleet>10) %>% arrange(desc(current_fleet_cost)))
#plot current vs future
g <- ggplot(airline_fleet,aes(x=fleet,y=orders,label=airline,color=as.factor(type)))+geom_point(size=0.5) +geom_text(size=2,hjust=0,vjust=0,nudge_x=7,nudge_y=-2)+ theme_minimal()
#geom_text(aes(label=rownames(airline_fleet)))
g + geom_smooth(aes(color=NA),method=lm,se=FALSE) + ggtitle("New Orders vs Existing Fleet Size")+labs(x="Current Fleet Size (Units)",y="New Orders (Units)")
g <- ggplot(airline_fleet %>% filter(fleet<=200),aes(x=fleet,y=orders,label=airline,color=as.factor(type)))+geom_point(size=0.5) +geom_text(size=2,hjust=0,vjust=0,nudge_x=3,nudge_y=-2)+ theme_minimal()
#geom_text(aes(label=rownames(airline_fleet)))
g + ggtitle("New Orders vs Existing Fleet Size")+labs(x="Current Fleet Size (Units)",y="New Orders (Units)")
neworders <-data %>% filter(!is.na(orders)) %>% filter(orders>0) %>% group_by(aircraft_type) %>% summarise(orders=sum(orders),cost=mean(unit_cost))
newplanes<-c("Airbus A380","Airbus A350","Airbus A350 XWB","Airbus A350-900","Boeing 787","Boeing 787 Dreamliner")
neworders <- neworders %>% mutate(new=ifelse(aircraft_type%in%newplanes,"new","old"))
head(neworders %>% arrange(desc(orders)))
ggplot(neworders,aes(x=orders,y=cost,label=aircraft_type,color=(new)))+geom_point(size=0.5)+geom_text(size=2,hjust=0,vjust=0,nudge_x=20,nudge_y=-2)+ggtitle("New orders vs Cost")+labs(x="New Orders (units)",y="Unit Plane Cost (USD M)")+theme_minimal()
buyingcheap <- data %>% filter(!is.na(orders)) %>% filter(orders>0) %>% filter(aircraft_type %in% c("Airbus A320","Boeing 737")) %>% group_by(airline) %>% summarise(orders=sum(orders))
head(buyingcheap %>% arrange(desc(orders)))
buying380 <- data %>% filter(!is.na(orders)) %>% filter(orders>0) %>% filter(aircraft_type=="Airbus A380") %>% group_by(airline) %>% summarise(orders=sum(orders))
head(buying380 %>% arrange(desc(orders)))
buying787DL <- data %>% filter(!is.na(orders)) %>% filter(orders>0) %>% filter(aircraft_type=="Boeing 737") %>% group_by(airline) %>% summarise(orders=sum(orders))
head(buying787DL %>% arrange(desc(orders)))
current_aircrafts <- data %>% filter(!is.na(current)) %>% filter(current>0) %>% group_by(plane_brand) %>% summarise(current=sum(current),cost=sum(total_cost,na.rm=TRUE))
head(current_aircrafts %>% arrange(desc(current)))
bycompany <- data %>% group_by(airline) %>% summarise(total=sum(current,na.rm=TRUE),airbus=sum(airbus),boeing=sum(boeing),airbusper=round(100*airbus/total),boeingper=round(100*boeing/total),airbusadd=sum(airbus_future),boeingadd=sum(boeing_future),newtotal=total+airbusadd+boeingadd,newairbusper=round(100*(airbus+airbusadd)/newtotal))
bycompany <- bycompany %>% mutate(type=ifelse(airline %in% lowcostcarriers,"Low Cost","Traditional"))
head(bycompany %>% filter(total> 300) %>% arrange(desc(boeingper)))
head(bycompany %>% filter(total> 300) %>% arrange(desc(airbusper)))
ggplot(data=bycompany,aes(x=airbusper,y=newairbusper,label=airline,color=type))+geom_point(size=0.5)+geom_text(size=2,hjust=0,vjust=0,nudge_x=1,nudge_y=-1)+theme_minimal()+ggtitle("Evolution (Rough Estimation) of Airbus Mix in the Future")+labs(x="Current Fleet Percentage of Airbus Planes (%)")+labs(y="Estimated Future Fleet Percentage of Airbus Planes (%)")
business <-data %>% filter(!is.na(orders)) %>% filter(orders>0) %>% group_by(plane_brand) %>% summarise(orders=sum(orders),cost=sum(newcost))
head(business %>% arrange(desc(cost)))
fleets <- data %>% group_by(airline) %>% summarise(total=sum(current,na.rm=TRUE),airbus=sum(airbus),boeing=sum(boeing),embraer=sum(embraer),bombardier=sum(bombardier),atr=sum(atr),mcdonnell=sum(mcdonnell),canadair=sum(canadair))
#head(fleets)
#summary(fleets)
fleets<-as.data.frame(fleets)
row.names(fleets)<-fleets[,1]
fleets.pca <- prcomp(fleets[,3:ncol(fleets)],center=TRUE,scale=TRUE)
print(fleets.pca)
biplot(fleets.pca,cex=0.4,xlim=c(-0.2,0.6))