Friday, August 7, 2009

EM - algorithm: LD


### LD
library(genetics)
fms<-read.delim(file="http://people.umass.edu/foulkes/asg/data/FMS_data.txt",header=T,sep="\t")
attach(fms)

Actn3Snp1<-genotype(actn3_r577x,sep="")
Actn3Snp2<-genotype(actn3_rs540874,sep="")
Actn3Snp3 <- genotype(actn3_rs1815739,sep="")
Actn3Snp4 <- genotype(actn3_1671064,sep="")

LD(Actn3Snp2,Actn3Snp3)

tt<-table(Actn3Snp2,Actn3Snp3)
temp<-tt[1,]
tt[1,]<-tt[3,];tt[3,]<-temp

pAB<-0.25;paB<-0.25;pAb<-0.25;pab<-0.25

N<-sum(tt)
pAB<-(2*tt[1,1]+tt[1,2]+tt[2,1]+tt[2,2]*(pAB*pab)/(pAB*pab+paB*pAb))/N/2
pAb<-(2*tt[1,3]+tt[1,2]+tt[2,3]+tt[2,2]*(paB*pAb)/(pAB*pab+paB*pAb))/N/2
paB<-(2*tt[3,1]+tt[3,2]+tt[2,1]+tt[2,2]*(paB*pAb)/(pAB*pab+paB*pAb))/N/2
pab<-(2*tt[3,3]+tt[3,2]+tt[2,3]+tt[2,2]*(pAB*pab)/(pAB*pab+paB*pAb))/N/2
cbind(pAB,pAb,paB,pab); pAB+pAb+paB+pab;
2*tt[1,1]+tt[1,2]+tt[2,1]+tt[2,2]*(pAB*pab)/(pAB*pab+paB*pAb)+
2*tt[1,3]+tt[1,2]+tt[2,3]+tt[2,2]*(paB*pAb)/(pAB*pab+paB*pAb)+
2*tt[3,1]+tt[3,2]+tt[2,1]+tt[2,2]*(paB*pAb)/(pAB*pab+paB*pAb)+
2*tt[3,3]+tt[3,2]+tt[2,3]+tt[2,2]*(pAB*pab)/(pAB*pab+paB*pAb)
tt[1,1]<-pAB^2
tt[1,2]<-2*pAB*pAb
tt[1,3]<-pAb^2
tt[2,1]<-2*pAB*paB
tt[2,2]<-2*pAB*pab+2*paB*pAb
tt[2,3]<-2*pAb*pab
tt[3,1]<-paB^2
tt[3,2]<-2*pab*paB
tt[3,3]<-pab^2
tt<-tt*N;tt

row<-apply(tt,1,sum); col<-apply(tt,2,sum)

pA<-(2*row[1]+row[2])/sum(row)/2;
pB<-(2*col[1]+col[2])/sum(col)/2;

pAB-pA*pB

EM - algorithm: ABO


### ABO
Na<-25; Nab<-50; Nb<-25; No<-15; N<-Na+Nb+No+Nab

pa<-0.33; pb<-0.33; po<-0.34

for(i in 1:20){
Naa<-pa^2/(pa^2+2*pa*po)*Na
Nao<-2*pa*po/(pa^2+2*pa*po)*Na
Nbb<-pb^2/(pb^2+2*pb*po)*Nb
Nbo<-2*pb*po/(pb^2+2*pb*po)*Nb
#Nab<-2*pa*pb*Nab
#Noo<-po^2*No
pa<-(2*Naa+Nab+Nao)/(Naa+Nao+Nbb+Nbo+Nab+No)/2;
pb<-(2*Nbb+Nab+Nbo)/(Naa+Nao+Nbb+Nbo+Nab+No)/2;
po<-(2*No+Nao+Nbo)/(Naa+Nao+Nbb+Nbo+Nab+No)/2;
}

cbind(Naa,Nao,Nbb,Nbo,Nab,No)
cbind(pa,pb,po)

Saturday, October 27, 2007

SNPs(Single nucleotide polymorphisms)

# more than 99% of human DNA sequences are the same across the population

# it must occur in at least 1% of the population.

# SNPs, which make up about 90% of all human genetic variation, occur every 100 to 300 bases along the 3-billion-base human genome

# two of every three SNPs involve the replacement of cytosine (C) with thymine (T)

# SNPs can occur in both coding (gene) and noncoding regions of the genome.

# Many SNPs have no effect on cell function, but scientists believe others could predispose people to disease or influence their response to a drug

Normalization

# Background Correction (Oligonucleotide arrays)
- The array is split into 16 rectangular zones
- Zone background is chosen to be the lowest 2% of intensities in each zone
- The background for each of the probes is computed as weighted sum of backgrounds of all zones.
- The corrected probe balues can be calculated by subtracting the background

# Normalization is necessary because the raw intensities of labeled targets vary among arrays due to sources of experimental variability independent of level of expression

Twin Study

C: the number of concordant pairs
D: the number of discordant pairs

Pairwise concordance = C/(C+D)
Probandwise concordance = 2C/(2C+D)

Pedigree Analysis

library(kinship)

# generate an example data
id <- 1:14
dadid <- c(NA, NA, 1, 1, 1, 3, 5, NA, NA, 8, 8, NA, NA, 11)
momid <- c(NA, NA, 2, 2, 2, 12, 13, NA, NA, 9, 9, NA, NA, 4)
sex <- c(1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 1)
affected <-c(1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 2, 1)
status <-c(0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0)

# make it a data frame
ped <- data.frame(id, dadid, momid, sex, affected, status)

# pedigree analysis
pp<-pedigree(id=ped$id,dadid=ped$dadid,momid=ped$momid,sex=ped$sex,affected=ped$affected,status=ped$status)

# pedigree plot
plot(pp)

Microarray in general

### cDNA microarray - 2 color

Cy5: Red (experimental, mutant)
Cy3: Green (reference, wild-type)

M = log2(R/G)
A = {log2(R) + log2(G)}/2

### "Long" oligo arrays
- two color and double-stranded
- 60-80 bp long

### Affymetrix arrays
- one color array and single stranded
- 25 bp

# probe: material that is purposedfully places on the array before the experiment
# target: the material that is gathered from a sample
# hybridization: target material is put on array, then targets stick to complementary probes