diff --git a/R_Final_Tasks_Statistics.R b/R_Final_Tasks_Statistics.R index 23e09ca..d3db9d9 100755 --- a/R_Final_Tasks_Statistics.R +++ b/R_Final_Tasks_Statistics.R @@ -3,27 +3,38 @@ #+We have chosen a dataset of CPU and GPU performance trends since 2000 - as published on Kaggle: #+https://www.kaggle.com/datasets/michaelbryantds/cpu-and-gpu-product-data -raw_perf_data <- read.csv("/home/shmick/Downloads/chip_dataset.csv") +chip <- read.csv("/home/shmick/Downloads/chip_dataset.csv") +#chip <- na.omit(chip) ##BONUS: convert from EPOCH: as.Date(as.POSIXct(1100171890,origin = "1970-01-01")) -View(raw_perf_data) +#View(chip) ##For question 1, we have chosen to examine which type of chip has examined the greater improvement over the years - GPU chips or CPU chips. #+As chip perfomance is most directly correlated with the number of transistors, we have measured the pace of development based on pace of #+increasing transistor count. CPU <- chip[chip$Type == 'CPU',] +CPU <- subset(CPU, select= c(Product,Type,Release.Date,Process.Size..nm.,TDP..W.,Die.Size..mm.2.,Transistors..million.,Freq..MHz.)) GPU <- chip[chip$Type == 'GPU',] - -CPU_Transistor_Count <- order(CPU$Transistors..million.) -GPU_Transistor_Count <- order(GPU$Transistors..million.) +GPU <- subset(GPU, select= c(Product,Type,Release.Date,Process.Size..nm.,TDP..W.,Die.Size..mm.2.,Transistors..million.,Freq..MHz.)) +#Calculate a crude 'performance factor' - the number of transistors multiplied by their frequency. +CPU["Performance Factor"] <- CPU$Transistors..million.*CPU$Freq..MHz. +GPU["Performance Factor"] <- GPU$Transistors..million.*GPU$Freq..MHz. +View(CPU) +View(GPU) +#Range of total transistor advancement +max(CPU$Transistors..million.,na.rm=TRUE) - min(CPU$Transistors..million.,na.rm=TRUE) +max(GPU$Transistors..million.,na.rm=TRUE) - min(GPU$Transistors..million.,na.rm=TRUE) +#Omit chips with missing data +CPU <- na.omit(CPU) +GPU <- na.omit(GPU) ##Iterate over date entries -for (i in 1:length(CPU$Release.Date)){print(i)} +#for (i in 1:length(CPU$Release.Date)){print(i)} ##Get date -for (i in 1:length(CPU$Release.Date)){print(CPU$Release.Date[i])} +#for (i in 1:length(CPU$Release.Date)){print(CPU$Release.Date[i])} ##QUESTION 2: measure number of columns in our dataset and calculate a permutation and combination of #+that number, minus two, and 3. #Calculate total number of columns in our dataset -n <- ncol(kernel_commits) -View(n) +#n <- ncol(kernel_commits) +#View(n) -##QUESTION 3: pick two categorial variables - month (?), is documentation \ No newline at end of file +##QUESTION 3: pick two categorcial variables - month (?), is documentation \ No newline at end of file