Digging into question 3

2023-03-06 15:15:02 +02:00
parent 2d44b377fb
commit d2cc765ed1
1 changed files with 35 additions and 9 deletions
--- a/R_Final_Tasks_Statistics.R
+++ b/R_Final_Tasks_Statistics.R
@@ -8,27 +8,39 @@ chip <- read.csv("/home/shmick/Downloads/chip_dataset.csv")
 ##BONUS: convert from EPOCH: as.Date(as.POSIXct(1100171890,origin = "1970-01-01"))
 #View(chip)
 ##For question 1, we have chosen to examine which type of chip has examined the greater improvement over the years - GPU chips or CPU chips.
-#+As chip perfomance is most directly correlated with the number of transistors, we have measured the pace of development based on pace of 
+#+As chip performance is most directly correlated with the number of transistors, we have measured the pace of development based on pace of 
 #+increasing transistor count.
 CPU <- chip[chip$Type == 'CPU',]
 CPU <- subset(CPU, select= c(Product,Type,Release.Date,Process.Size..nm.,TDP..W.,Die.Size..mm.2.,Transistors..million.,Freq..MHz.))
 GPU <- chip[chip$Type == 'GPU',]
 GPU <- subset(GPU, select= c(Product,Type,Release.Date,Process.Size..nm.,TDP..W.,Die.Size..mm.2.,Transistors..million.,Freq..MHz.))
 #Calculate a crude 'performance factor' - the number of transistors multiplied by their frequency.
-CPU["Performance Factor"] <- CPU$Transistors..million.*CPU$Freq..MHz.
-GPU["Performance Factor"] <- GPU$Transistors..million.*GPU$Freq..MHz.
-View(CPU)
-View(GPU)
+#CPU["Performance Factor"])
 #Range of total transistor advancement
 max(CPU$Transistors..million.,na.rm=TRUE) - min(CPU$Transistors..million.,na.rm=TRUE)
 max(GPU$Transistors..million.,na.rm=TRUE) - min(GPU$Transistors..million.,na.rm=TRUE)
 #Omit chips with missing data
-CPU <- na.omit(CPU)
-GPU <- na.omit(GPU)
+#CPU <- na.omit(CPU)
+#GPU <- na.omit(GPU)
 ##Iterate over date entries
 #for (i in 1:length(CPU$Release.Date)){print(i)}
 ##Get date
-#for (i in 1:length(CPU$Release.Date)){print(CPU$Release.Date[i])}
+##Install the 'lubridate' package to deal with conversion to EPOCH time
+#install.packages('lubridate')
+#library(lubridate)
+#dates <- strptime(CPU$Release.Date,format="%Y-%m-%d")
+#as.integer(as.POSIXct(CPU$Release.Date))
+#posix_format_date <- c()
+#or (date in 1:length(CPU$Release.Date)){
+#  cat("Date is", date)
+#  human_format_date <- CPU$Release.Date[date]
+#  print(human_format_date)
+#  posix_format_date[date] <- strptime(human_format_date,format="%Y-%m-%d")
+#}
+#for (i in CPU$Release.Date){
+#  print(i)
+#}
+

 ##QUESTION 2: measure number of columns in our dataset and calculate a permutation and combination of 
 #+that number, minus two, and 3.
@@ -37,4 +49,18 @@ GPU <- na.omit(GPU)
 #n <- ncol(kernel_commits)
 #View(n)

-##QUESTION 3: pick two categorcial variables - month (?), is documentation
+##QUESTION 3: pick two categorcial variables (Chip type, foundry) and see whether they're dependent
+#+1. Probablity of chip type
+#+2. Probability of foundry
+#+3. Multiplty
+
+#Sample 1 variable from 'Type' column
+chip_type_sample <- sample(chip$Type,1)
+#Count how many times it appears in it's column
+p_chip_type_sample <- (length(which(chip$Type==chip_type_sample)))/length(chip$Type)
+chip_foundry_sample <- sample (chip$Foundry,1)
+p_chip_foundry_sample <- (length(which(chip$Foundry==chip_foundry_sample)))/length(chip$Foundry)
+chip_type_sample_matrix <- chip[chip$Type == chip_type_sample,]
+p_chip_type_foundry_sample <- (length(which(chip_type_sample_matrix$Foundry==chip_foundry_sample)))/length(chip_type_sample_matrix$Foundry)
+#p_victim_bastard <- p_neo_bastard * nrow(CPU )
+p_chip_type_foundry_sample * p_chip_type_sample