diff --git a/R_Final_Tasks_Statistics.R b/R_Final_Tasks_Statistics.R index 4112e1d..c2bb43c 100755 --- a/R_Final_Tasks_Statistics.R +++ b/R_Final_Tasks_Statistics.R @@ -11,9 +11,9 @@ chip <- read.csv("/home/shmick/Downloads/chip_dataset.csv") #+As chip performance is most directly correlated with the number of transistors, we have measured the pace of development based on pace of #+increasing transistor count. CPU <- chip[chip$Type == 'CPU',] -CPU <- subset(CPU, select= c(Product,Type,Release.Date,Process.Size..nm.,TDP..W.,Die.Size..mm.2.,Transistors..million.,Freq..MHz.)) +#CPU <- subset(CPU, select= c(Product,Type,Release.Date,Process.Size..nm.,TDP..W.,Die.Size..mm.2.,Transistors..million.,Freq..MHz.)) GPU <- chip[chip$Type == 'GPU',] -GPU <- subset(GPU, select= c(Product,Type,Release.Date,Process.Size..nm.,TDP..W.,Die.Size..mm.2.,Transistors..million.,Freq..MHz.)) +#GPU <- subset(GPU, select= c(Product,Type,Release.Date,Process.Size..nm.,TDP..W.,Die.Size..mm.2.,Transistors..million.,Freq..MHz.)) #Calculate a crude 'performance factor' - the number of transistors multiplied by their frequency. #CPU["Performance Factor"]) #Range of total transistor advancement @@ -55,12 +55,106 @@ max(GPU$Transistors..million.,na.rm=TRUE) - min(GPU$Transistors..million.,na.rm= #+3. Multiplty #Sample 1 variable from 'Type' column -chip_type_sample <- sample(chip$Type,1) +sampled_type <- sample(chip$Type,1) #Count how many times it appears in it's column -p_chip_type_sample <- (length(which(chip$Type==chip_type_sample)))/length(chip$Type) -chip_foundry_sample <- sample (chip$Foundry,1) -p_chip_foundry_sample <- (length(which(chip$Foundry==chip_foundry_sample)))/length(chip$Foundry) -chip_type_sample_matrix <- chip[chip$Type == chip_type_sample,] -p_chip_type_foundry_sample <- (length(which(chip_type_sample_matrix$Foundry==chip_foundry_sample)))/length(chip_type_sample_matrix$Foundry) -#p_victim_bastard <- p_neo_bastard * nrow(CPU ) -p_chip_type_foundry_sample * p_chip_type_sample \ No newline at end of file +p_sampled_type <- (length(which(chip$Type==sampled_type)))/length(chip$Type) +sampled_foundry <- sample(chip$Foundry,1) +p_sampled_foundry <- (length(which(chip$Foundry==sampled_foundry)))/length(chip$Foundry) +sampled_type_matrix <- chip[chip$Type == sampled_type,] +p_sampled_foundry_in_sampled_type <- (length(which(sampled_type_matrix$Foundry==sampled_foundry)))/length(sampled_type_matrix$Foundry) +p_sampled_chip_and_foundry <- p_sampled_foundry_in_sampled_type * p_sampled_type + +if (p_sampled_chip_and_foundry == (p_sampled_type * p_sampled_foundry)){ + print("Independent") +}else{ + print("Dependent") +} + +#Question 4 - 'Amazing' +GPU <- na.omit(GPU) +fp16_gflops <- na.omit(GPU$FP16.GFLOPS) +#Get total range of FP.16 GFLOPS +fp16_range <- as.numeric(sprintf("%.2f",(max(GPU$FP16.GFLOPS,na.rm=TRUE))-min(GPU$FP16.GFLOPS,na.rm=TRUE))) +fp16_low_threshold <- fp16_range / 3 +fp16_medium_threshold <- fp16_low_threshold *2 +#Create empty vector named 'amazing' +amazing <-c() +#Iterate over all numbers from 1 to the length of the vector +for (i in 1:length(fp16_gflops)) +{ + fp16_gflop <- fp16_gflops[i] + #If the number is greater or equal to 1 AND ALSO (&) smaller or equal to 3... + if(fp16_gflop <= fp16_low_threshold) + # ^ this bit is important + { + cat(fp16_gflop, "is low\n") + #Add "low" to list called 'Amazing' + amazing[i] <- "low" + #Once this condition is satisfied, move on to next item in loop (if on 1, move on to 2, etc) + next + } + #If the number is greater or equal to 3 AND ALSO (&) smaller or equal to 6... + else if(fp16_gflop > fp16_low_threshold & fp16_gflop <= fp16_medium_threshold ) + # ^ this is like two IF's + { + cat(fp16_gflop, "is medium\n") + amazing[i] <- "medium" + next + } else if(fp16_gflop > fp16_medium_threshold) { + cat(fp16_gflop, "is high\n") + amazing[i] <- "high" + next + } else { + cat(fp16_gflop, "is unknown\n") + } +} +amazing +GPU["Amazing"] <- amazing +#Question 5 +sorted_fp16_gflops <- sort(fp16_gflops) +fp16_gflops_length <- length(fp16_gflops) +#If the length of the sorted vector is divisble by 2... +if ((fp16_gflops_length %% 2) == 0) { + print("Dataset is even") + #... create a vector of the 2 middle elements... + fp16_gflops_medians <- c((fp16_gflops_length/2),((fp16_gflops_length/2)+1)) + #... and calculate their average; that is the mean. + fp16_gflops_median <- mean(sorted_fp16_gflops[fp16_gflops_medians]) + # ^ This is a vector of the 2 middle spots in our even vector +} else #< If the length of the sorted vector is odd... +{ + print("Vector is odd") + #Get the index of the median number by adding 1 to the total count, and divide by half. + fp16_gflops_median_index <- (((fp16_gflops_length + 1)/2)) + #The median is the number in the index we figured out earlier; pull it from the sorted vector. + fp16_gflops_median <- sorted_fp16_gflops[fp16_gflops_median_index] +} +cat("Median is:", fp16_gflops_median) +#Question 6 +sampled_fp_32_gflops <- c() +for (i in 1:3){ + cat("On ", i, "\n") + sampled_fp_32_gflop <- sample(chip$FP32.GFLOPS,1) + while (sampled_fp_32_gflop < 0 | is.na(sampled_fp_32_gflop)) + { + cat("Sampled value ", sampled_fp_32_gflops, "is negative. Retrying...\n") + sampled_fp_32_gflop <- sample(chip$FP32.GFLOPS,1) + } + + sampled_fp_32_gflops[i] <- sampled_fp_32_gflop +} +pnorm(sampled_fp_32_gflops[1],mean = sampled_fp_32_gflops[2], sd = sqrt(sampled_fp_32_gflops[3])) + +#Question 7 +fp64_gflops <- na.omit(GPU$FP64.GFLOPS) +mean(fp64_gflops) +var(fp64_gflops) +zscore <- (fp64_gflops - mean(fp64_gflops)) / sd(fp64_gflops) +#fp64_gflops_trans <- (fp64_gflops*2 + 16) +zscore_lin_trans <- ( ( (1/sd(fp64_gflops) * 2000 ) * fp64_gflops ) - ( mean(fp64_gflops)/sd(fp64_gflops) ) ) +# ^ THIS is the linear transformation. +zscore_non_lin_trans <- ( ( (1/sd(fp64_gflops) * (fp64_gflops) ^ -0.7 ) * fp64_gflops ) - ( mean(fp64_gflops)/sd(fp64_gflops) ) ) + +plot(zscore_lin_trans,zscore_non_lin_trans,col = blue) +#plot(zscore,zscore_lin_trans) +doubled_zscore <- zscore * 2 \ No newline at end of file