Compare commits
11 Commits
84fda70cc6
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 52a300cd77 | |||
| a27137a2b1 | |||
| 9a0cf35684 | |||
| 193143a79f | |||
| e7f055c670 | |||
| d2cc765ed1 | |||
| 2d44b377fb | |||
| 6bd14b5476 | |||
| a40330b3ac | |||
| 7df5894056 | |||
| f7850cf2bb |
14
03112022.R
Normal file
14
03112022.R
Normal file
@@ -0,0 +1,14 @@
|
||||
#Here's how to do stuff to the power of other stuff.
|
||||
une = 8^2
|
||||
#And how to do a factorial ([num]!)
|
||||
duex = factorial(100)
|
||||
#Excresice from slide with wee buggers. Also, this thing autocorrects comments. Wee shite.
|
||||
wee_buggers = factorial(6) * factorial(4)
|
||||
# This is a multiply ^ and it is important
|
||||
colorful_wee_buggers = factorial(4) * factorial(5) * factorial(6) * factorial(3)
|
||||
# That is the arrangement of the groups ^
|
||||
#Now, try being a bastard.
|
||||
vectroful_wee_buggers = c(4,5,6)
|
||||
for (wee_bugger in vectroful_wee_buggers) {
|
||||
print(wee_bugger)
|
||||
}
|
||||
24
23112022.R
Normal file
24
23112022.R
Normal file
@@ -0,0 +1,24 @@
|
||||
#Specify number of elements
|
||||
count=100
|
||||
#Create vector of COUNT elements ranging from 1 to 6
|
||||
hundred <- (sample(c(1:6), count, replace = T))
|
||||
#Keep count of 1's in seperate variable
|
||||
one_count = 0
|
||||
#Iterate over array elements
|
||||
###
|
||||
for (i in hundred) {
|
||||
#If current element is 1, add to count variable
|
||||
if (i == 1) {
|
||||
print("i is 1")
|
||||
one_count = one_count + 1
|
||||
}
|
||||
}
|
||||
### OR
|
||||
also_one_count <- sum(hundred=="1")
|
||||
###
|
||||
cat("One count is", one_count, "and also", also_one_count)
|
||||
###
|
||||
rel_one=(one_count/count)
|
||||
###OR
|
||||
also_rel_one < - sum(hundred=="1")/length(hundred) #Why is this one different?
|
||||
cat("Relative frequency of one is", rel_one, "and also", also_one_count)
|
||||
160
R_Final_Tasks_Statistics.R
Executable file
160
R_Final_Tasks_Statistics.R
Executable file
@@ -0,0 +1,160 @@
|
||||
##Final R assignment in Intro to Statistics course, fall semster.
|
||||
#+Written by Matan Horovitz (207130253) and Guy Amzaleg ()
|
||||
#+We have chosen a dataset of CPU and GPU performance trends since 2000 - as published on Kaggle:
|
||||
#+https://www.kaggle.com/datasets/michaelbryantds/cpu-and-gpu-product-data
|
||||
|
||||
chip <- read.csv("/home/shmick/Downloads/chip_dataset.csv")
|
||||
#chip <- na.omit(chip)
|
||||
##BONUS: convert from EPOCH: as.Date(as.POSIXct(1100171890,origin = "1970-01-01"))
|
||||
#View(chip)
|
||||
##For question 1, we have chosen to examine which type of chip has examined the greater improvement over the years - GPU chips or CPU chips.
|
||||
#+As chip performance is most directly correlated with the number of transistors, we have measured the pace of development based on pace of
|
||||
#+increasing transistor count.
|
||||
CPU <- chip[chip$Type == 'CPU',]
|
||||
#CPU <- subset(CPU, select= c(Product,Type,Release.Date,Process.Size..nm.,TDP..W.,Die.Size..mm.2.,Transistors..million.,Freq..MHz.))
|
||||
GPU <- chip[chip$Type == 'GPU',]
|
||||
#GPU <- subset(GPU, select= c(Product,Type,Release.Date,Process.Size..nm.,TDP..W.,Die.Size..mm.2.,Transistors..million.,Freq..MHz.))
|
||||
#Calculate a crude 'performance factor' - the number of transistors multiplied by their frequency.
|
||||
#CPU["Performance Factor"])
|
||||
#Range of total transistor advancement
|
||||
max(CPU$Transistors..million.,na.rm=TRUE) - min(CPU$Transistors..million.,na.rm=TRUE)
|
||||
max(GPU$Transistors..million.,na.rm=TRUE) - min(GPU$Transistors..million.,na.rm=TRUE)
|
||||
#Omit chips with missing data
|
||||
#CPU <- na.omit(CPU)
|
||||
#GPU <- na.omit(GPU)
|
||||
##Iterate over date entries
|
||||
#for (i in 1:length(CPU$Release.Date)){print(i)}
|
||||
##Get date
|
||||
##Install the 'lubridate' package to deal with conversion to EPOCH time
|
||||
#install.packages('lubridate')
|
||||
#library(lubridate)
|
||||
#dates <- strptime(CPU$Release.Date,format="%Y-%m-%d")
|
||||
#as.integer(as.POSIXct(CPU$Release.Date))
|
||||
#posix_format_date <- c()
|
||||
#or (date in 1:length(CPU$Release.Date)){
|
||||
# cat("Date is", date)
|
||||
# human_format_date <- CPU$Release.Date[date]
|
||||
# print(human_format_date)
|
||||
# posix_format_date[date] <- strptime(human_format_date,format="%Y-%m-%d")
|
||||
#}
|
||||
#for (i in CPU$Release.Date){
|
||||
# print(i)
|
||||
#}
|
||||
|
||||
|
||||
##QUESTION 2: measure number of columns in our dataset and calculate a permutation and combination of
|
||||
#+that number, minus two, and 3.
|
||||
|
||||
#Calculate total number of columns in our dataset
|
||||
#n <- ncol(kernel_commits)
|
||||
#View(n)
|
||||
|
||||
##QUESTION 3: pick two categorcial variables (Chip type, foundry) and see whether they're dependent
|
||||
#+1. Probablity of chip type
|
||||
#+2. Probability of foundry
|
||||
#+3. Multiplty
|
||||
|
||||
#Sample 1 variable from 'Type' column
|
||||
sampled_type <- sample(chip$Type,1)
|
||||
#Count how many times it appears in it's column
|
||||
p_sampled_type <- (length(which(chip$Type==sampled_type)))/length(chip$Type)
|
||||
sampled_foundry <- sample(chip$Foundry,1)
|
||||
p_sampled_foundry <- (length(which(chip$Foundry==sampled_foundry)))/length(chip$Foundry)
|
||||
sampled_type_matrix <- chip[chip$Type == sampled_type,]
|
||||
p_sampled_foundry_in_sampled_type <- (length(which(sampled_type_matrix$Foundry==sampled_foundry)))/length(sampled_type_matrix$Foundry)
|
||||
p_sampled_chip_and_foundry <- p_sampled_foundry_in_sampled_type * p_sampled_type
|
||||
|
||||
if (p_sampled_chip_and_foundry == (p_sampled_type * p_sampled_foundry)){
|
||||
print("Independent")
|
||||
}else{
|
||||
print("Dependent")
|
||||
}
|
||||
|
||||
#Question 4 - 'Amazing'
|
||||
GPU <- na.omit(GPU)
|
||||
fp16_gflops <- na.omit(GPU$FP16.GFLOPS)
|
||||
#Get total range of FP.16 GFLOPS
|
||||
fp16_range <- as.numeric(sprintf("%.2f",(max(GPU$FP16.GFLOPS,na.rm=TRUE))-min(GPU$FP16.GFLOPS,na.rm=TRUE)))
|
||||
fp16_low_threshold <- fp16_range / 3
|
||||
fp16_medium_threshold <- fp16_low_threshold *2
|
||||
#Create empty vector named 'amazing'
|
||||
amazing <-c()
|
||||
#Iterate over all numbers from 1 to the length of the vector
|
||||
for (i in 1:length(fp16_gflops))
|
||||
{
|
||||
fp16_gflop <- fp16_gflops[i]
|
||||
#If the number is greater or equal to 1 AND ALSO (&) smaller or equal to 3...
|
||||
if(fp16_gflop <= fp16_low_threshold)
|
||||
# ^ this bit is important
|
||||
{
|
||||
cat(fp16_gflop, "is low\n")
|
||||
#Add "low" to list called 'Amazing'
|
||||
amazing[i] <- "low"
|
||||
#Once this condition is satisfied, move on to next item in loop (if on 1, move on to 2, etc)
|
||||
next
|
||||
}
|
||||
#If the number is greater or equal to 3 AND ALSO (&) smaller or equal to 6...
|
||||
else if(fp16_gflop > fp16_low_threshold & fp16_gflop <= fp16_medium_threshold )
|
||||
# ^ this is like two IF's
|
||||
{
|
||||
cat(fp16_gflop, "is medium\n")
|
||||
amazing[i] <- "medium"
|
||||
next
|
||||
} else if(fp16_gflop > fp16_medium_threshold) {
|
||||
cat(fp16_gflop, "is high\n")
|
||||
amazing[i] <- "high"
|
||||
next
|
||||
} else {
|
||||
cat(fp16_gflop, "is unknown\n")
|
||||
}
|
||||
}
|
||||
amazing
|
||||
GPU["Amazing"] <- amazing
|
||||
#Question 5
|
||||
sorted_fp16_gflops <- sort(fp16_gflops)
|
||||
fp16_gflops_length <- length(fp16_gflops)
|
||||
#If the length of the sorted vector is divisble by 2...
|
||||
if ((fp16_gflops_length %% 2) == 0) {
|
||||
print("Dataset is even")
|
||||
#... create a vector of the 2 middle elements...
|
||||
fp16_gflops_medians <- c((fp16_gflops_length/2),((fp16_gflops_length/2)+1))
|
||||
#... and calculate their average; that is the mean.
|
||||
fp16_gflops_median <- mean(sorted_fp16_gflops[fp16_gflops_medians])
|
||||
# ^ This is a vector of the 2 middle spots in our even vector
|
||||
} else #< If the length of the sorted vector is odd...
|
||||
{
|
||||
print("Vector is odd")
|
||||
#Get the index of the median number by adding 1 to the total count, and divide by half.
|
||||
fp16_gflops_median_index <- (((fp16_gflops_length + 1)/2))
|
||||
#The median is the number in the index we figured out earlier; pull it from the sorted vector.
|
||||
fp16_gflops_median <- sorted_fp16_gflops[fp16_gflops_median_index]
|
||||
}
|
||||
cat("Median is:", fp16_gflops_median)
|
||||
#Question 6
|
||||
sampled_fp_32_gflops <- c()
|
||||
for (i in 1:3){
|
||||
cat("On ", i, "\n")
|
||||
sampled_fp_32_gflop <- sample(chip$FP32.GFLOPS,1)
|
||||
while (sampled_fp_32_gflop < 0 | is.na(sampled_fp_32_gflop))
|
||||
{
|
||||
cat("Sampled value ", sampled_fp_32_gflops, "is negative. Retrying...\n")
|
||||
sampled_fp_32_gflop <- sample(chip$FP32.GFLOPS,1)
|
||||
}
|
||||
|
||||
sampled_fp_32_gflops[i] <- sampled_fp_32_gflop
|
||||
}
|
||||
pnorm(sampled_fp_32_gflops[1],mean = sampled_fp_32_gflops[2], sd = sqrt(sampled_fp_32_gflops[3]))
|
||||
|
||||
#Question 7
|
||||
fp64_gflops <- na.omit(GPU$FP64.GFLOPS)
|
||||
mean(fp64_gflops)
|
||||
var(fp64_gflops)
|
||||
zscore <- (fp64_gflops - mean(fp64_gflops)) / sd(fp64_gflops)
|
||||
#fp64_gflops_trans <- (fp64_gflops*2 + 16)
|
||||
zscore_lin_trans <- ( ( (1/sd(fp64_gflops) * 2000 ) * fp64_gflops ) - ( mean(fp64_gflops)/sd(fp64_gflops) ) )
|
||||
# ^ THIS is the linear transformation.
|
||||
zscore_non_lin_trans <- ( ( (1/sd(fp64_gflops) * (fp64_gflops) ^ -0.7 ) * fp64_gflops ) - ( mean(fp64_gflops)/sd(fp64_gflops) ) )
|
||||
|
||||
plot(zscore_lin_trans,zscore_non_lin_trans,col = blue)
|
||||
#plot(zscore,zscore_lin_trans)
|
||||
doubled_zscore <- zscore * 2
|
||||
34
amazing.R
Normal file
34
amazing.R
Normal file
@@ -0,0 +1,34 @@
|
||||
#Create vector from 1 to 10
|
||||
num <- c(seq(1:10))
|
||||
#Create empty vector named 'amazing'
|
||||
amazing <-c()
|
||||
#Iterate over all numbers from 1 to the length of the vector (10)
|
||||
for (i in 1:length(num))
|
||||
{
|
||||
#If the number is greater or equal to 1 AND ALSO (&) smaller or equal to 3...
|
||||
if(i >= 1 & i <=3)
|
||||
# ^ this bit is important
|
||||
{
|
||||
cat(i, "is low\n")
|
||||
#Add "low" to list called 'Amazing'
|
||||
amazing[i] <- "low"
|
||||
#Once this condition is satisfied, move on to next item in loop (if on 1, move on to 2, etc)
|
||||
next
|
||||
}
|
||||
#If the number is greater or equal to 3 AND ALSO (&) smaller or equal to 6...
|
||||
else if(i > 3 & i <=6)
|
||||
# ^ this is like two IF's
|
||||
{
|
||||
cat(i, "is medium\n")
|
||||
amazing[i] <- "medium"
|
||||
next
|
||||
}
|
||||
|
||||
else if(i > 6 & i <=10)
|
||||
# ^ this bit is also important
|
||||
cat(i, "is high\n")
|
||||
amazing[i] <- "high"
|
||||
next
|
||||
}
|
||||
amazing
|
||||
sleep["Amazing"] <- amazing
|
||||
20
flip.R
Normal file
20
flip.R
Normal file
@@ -0,0 +1,20 @@
|
||||
count = 100
|
||||
flip <- sample(c("head","tail"), count, replace =T, prob = c(0.75,0.25))
|
||||
table(flip)
|
||||
is_head <- flip == "head"
|
||||
table(is_head)
|
||||
#Each TRUE counts as 1, and FALSE as 0; thus you can figure out amount of heads.
|
||||
sum(is_head)
|
||||
freq <- c(1:length(is_head))
|
||||
for (result in 1:length(is_head)) {
|
||||
freq[result] <- sum(is_head[1:result])
|
||||
}
|
||||
rel_freq <- freq/1:length(is_head)
|
||||
plot(rel_freq)
|
||||
#####
|
||||
a <- rep(0,count)
|
||||
|
||||
for (i in 1:count) {
|
||||
a[i] <- i^2
|
||||
}
|
||||
#SEE cumsum
|
||||
86
loss.R
Normal file
86
loss.R
Normal file
@@ -0,0 +1,86 @@
|
||||
# Install datawizard package (for calculating mode with 'distribution_mode()')
|
||||
|
||||
if (!require(datawizard)) {
|
||||
install.packages("datawizard")
|
||||
library(datawizard)
|
||||
}
|
||||
|
||||
|
||||
# Define the Salaries vector
|
||||
Salaries <- c(12567, 15400, 11345, 13130, 12567, 12812, 14908)
|
||||
|
||||
|
||||
# Calculate the mode
|
||||
modeS <- distribution_mode(Salaries)
|
||||
modeS
|
||||
|
||||
# Calculate the median
|
||||
meadianS <- median(Salaries)
|
||||
meadianS
|
||||
|
||||
# Calculate the mean
|
||||
meanS <- mean(Salaries)
|
||||
meanS
|
||||
|
||||
|
||||
|
||||
|
||||
# The first loss function ---------------------
|
||||
## For each value in vector "Salaries", check whether it is:
|
||||
#+ 1. different from the mode
|
||||
#+ 2. different from the median
|
||||
#+ 3. different from the mean
|
||||
#+ ...And sum up the results (the "True" results)
|
||||
|
||||
# How many values are different from the mode?
|
||||
# (how many people make something other than the mode?)
|
||||
sum(Salaries != modeS)
|
||||
# How many people make something other than the median?
|
||||
sum(Salaries != meadianS)
|
||||
# Etc
|
||||
sum(Salaries != meanS)
|
||||
|
||||
|
||||
|
||||
|
||||
# Second loss function ----------------------------
|
||||
sum( abs (Salaries - modeS))
|
||||
# ^ summarize the ^ absolute value of ^ each salary minus the modal salary
|
||||
sum(abs(Salaries - meadianS))
|
||||
sum(abs(Salaries - meanS))
|
||||
|
||||
|
||||
|
||||
# Third loss function ----------------------------
|
||||
sum((Salaries - modeS)^2)
|
||||
sum((Salaries - meadianS)^2)
|
||||
sum((Salaries - meanS)^2)
|
||||
|
||||
#..........................................Exercise 2
|
||||
#Create a vector of numbers
|
||||
potato <- runif(n=20, min=1, max=20)
|
||||
|
||||
#Calculate the mean, median and mod of the vector of numbers you created
|
||||
|
||||
potato_mean <- mean(potato)
|
||||
potato_mean
|
||||
potato_median <- median(potato)
|
||||
potato_median
|
||||
potato_mode=distribution_mode(potato)
|
||||
potato_mode
|
||||
# print results
|
||||
sprintf("Mean %s Median %s Mode %s", potato_mean, potato_median, potato_mode)
|
||||
|
||||
#Calculate the error for each loss function for each of the
|
||||
#measures you calculated (the mean, median and mod)
|
||||
|
||||
# 1st loss function
|
||||
sum(potato != potato_mode)
|
||||
|
||||
# 2nd loss function
|
||||
|
||||
sum(abs(potato - potato_median))
|
||||
|
||||
# 3rd loss function
|
||||
|
||||
sum((potato - potato_mean)^2)
|
||||
23
mean.R
Normal file
23
mean.R
Normal file
@@ -0,0 +1,23 @@
|
||||
#Create two vectors with random, normal distribution numbers: one with even, and another with odd number of elements.
|
||||
bunch_of_nums_even <- rnorm(100)
|
||||
bunch_of_nums_odd <- rnorm(101)
|
||||
#Sort one of the vectors according to number size
|
||||
sorted_nums <- sort(bunch_of_nums_even) #< I used even for this example - switch to odd to test that as well.
|
||||
num_length <- length(sorted_nums)
|
||||
#If the length of the sorted vector is divisble by 2...
|
||||
if ((num_length %% 2) == 0) {
|
||||
print("Vector is even")
|
||||
#... create a vector of the 2 middle elements...
|
||||
num_medians <- c((num_length/2),((num_length/2)+1))
|
||||
#... and calculate their average; that is the mean.
|
||||
num_median <- mean(sorted_nums[num_medians])
|
||||
# ^ This is a vector of the 2 middle spots in our even vector
|
||||
} else #< If the length of the sorted vector is odd...
|
||||
{
|
||||
print("Vector is odd")
|
||||
#Get the index of the median number by substracting 1 from the total count, divide by half and add one again.
|
||||
num_median_index <- (((num_length + 1)/2)) #<There's probably a better way to do this.
|
||||
#The median is the number in the index we figured out earlier; pull it from the sorted vector.
|
||||
num_median <- sorted_nums[num_median_index]
|
||||
}
|
||||
cat("Median is:", num_median)
|
||||
26
merde_1.R
Normal file
26
merde_1.R
Normal file
@@ -0,0 +1,26 @@
|
||||
#Question 4
|
||||
mean_1 = 50
|
||||
n_1 = 36
|
||||
std_1 = 42
|
||||
#a_1 = 0.05 NOPE
|
||||
a_1 = 1.96
|
||||
lower_1 = mean_1 - (std_1/sqrt(n_1))*a_1
|
||||
upper_1 = mean_1 + (std_1/sqrt(n_1))*a_1
|
||||
|
||||
cat(lower_1,"-",mean_1,"",upper_1)
|
||||
#Question 5
|
||||
## Higher confidence -> lower accuracy -> less accurate estimate -> greater range
|
||||
## ~~Bigger n -> greater range -> greater standard deviation -> ?~~ NOPE
|
||||
## Bigger n -> smaller standard error (std_1/sqrt(n_1)) -> smaller confidence range
|
||||
|
||||
#Question 6
|
||||
|
||||
std_2 = 15
|
||||
e_2 = 10 #?
|
||||
# 10 = 15/sqrt(n) -> sqrt(n) = 15/10 -> n_2 = 1.5 #nope
|
||||
#1.96*stderr_2 = e_2 (10)
|
||||
#stderr_2 = 5.102041
|
||||
#n_2: 5.102041 = 15/sqrt(n)
|
||||
#15/5.102041 = sqrt(n)
|
||||
# 10 = 1.96*(15/sqrt(n))
|
||||
|
||||
31
merde_2.R
Normal file
31
merde_2.R
Normal file
@@ -0,0 +1,31 @@
|
||||
n=36
|
||||
x_init=0.46 #<proportion - p - parameter
|
||||
x_finit=0.38
|
||||
#---Step 1 - define the hypothesis
|
||||
#H0: p = 0.46
|
||||
#H1: p!= 0.46
|
||||
#---Step 2 - define the assumptions and distribution shape
|
||||
#1. Assume normal distribution
|
||||
#2. Assume random, unbiased sampling
|
||||
#3. p`= p = 0.46 - H0 is assumed true
|
||||
#4. stderr of p` = sqrt((q*p)/n)
|
||||
|
||||
#---Step 3 - determine where a (alpha) is, define criterion
|
||||
a=0.02 #0.04/2 Assumption: H1 (double tail)
|
||||
Zc=2.05 #<given
|
||||
#stderr=sqrt(((0.46-0.38)^2)/35) #<what is this misery?
|
||||
#^this is nonsense
|
||||
|
||||
#---Step 4 - ???
|
||||
#stderr=sqrt(((0.46-0.38)^2)/35) #<Is it this thing? it it this bastard?
|
||||
#Zp`^
|
||||
Zptag=(0.38-0.46)#/@p`
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
a=0.05
|
||||
x=5
|
||||
s=1.5
|
||||
n=15
|
||||
s2<s
|
||||
#Is s` the same as s in this case?
|
||||
#s` pushes upwards - this is a rear tail - possibly different result! (?)
|
||||
#What on earth is a t test?
|
||||
Reference in New Issue
Block a user