# Pipeline started at 06-11 17:29:51

ln -sf /project/shefflab/data//sra_fastq/SRR1552484.fastq.gz /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/raw/K562_GRO-seq.fastq.gz
pigz -f -p 12 -d -c /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/raw/K562_GRO-seq.fastq.gz > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/fastq/K562_GRO-seq_R1.fastq
cutadapt --version
(cutadapt -j 12 -m 2 -O 1 -a TGGAATTCTCGGGTGCCAAGG /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/fastq/K562_GRO-seq_R1.fastq -o /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/fastq/K562_GRO-seq_R1_noadap.fastq --too-short-output /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/fastq/K562_GRO-seq_R1_short.fastq ) > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt/K562_GRO-seq_R1_cutadapt.txt
seqtk trimfq -b 0 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/fastq/K562_GRO-seq_R1_noadap.fastq | seqtk seq -L 2 - > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/fastq/K562_GRO-seq_R1_processed.fastq
grep 'Reads with adapters:' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt/K562_GRO-seq_R1_cutadapt.txt | awk '{print $(NF-1)}'
grep 'Total basepairs processed:' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt/K562_GRO-seq_R1_cutadapt.txt | awk '{print $(NF-1)}'
awk '{sum+=$1*$2} END {printf "%.0f", sum}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt/K562_GRO-seq_R1_cutadapt.txt
wc -l /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/fastq/K562_GRO-seq_R1_short.fastq | awk '{print $1}'
echo '### Calculate the number of trimmed reads'
fastqc --noextract --outdir /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/fastqc /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/fastq/K562_GRO-seq_R1_processed.fastq
touch /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/fastq/processed_R1.flag
Rscript /scratch/jps3dp/tools/databio//peppro/tools/PEPPRO.R cutadapt -i /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt/K562_GRO-seq_R1_cutadapt.txt -o /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt
awk '/count/,0' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt/K562_GRO-seq_R1_cutadapt.txt | awk 'NR>2 {print prev} {prev=$0}' | awk '{if ($3/$2 < 0.01) print $1, $2}' | awk 'BEGIN{max=   0; max_len=0; len=0}{if ($2>0+max) {max=$2; len=$1}; max_len=$1} END{print max_len-len}'
awk 'NR>2 {print prev} {prev=$0}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt/K562_GRO-seq_R1_cutadapt.txt | awk '{ if ($1 == 10) {status = 1}} END {if (status) {print status} else {print 0}}'
awk 'NR>2 {print prev} {prev=$0}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt/K562_GRO-seq_R1_cutadapt.txt | awk '{ if ($1 == 20) {status = 1}} END {if (status) {print status} else {print 0}}'
awk 'NR>2 {print prev} {prev=$0}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt/K562_GRO-seq_R1_cutadapt.txt | awk '{ if ($1 == 30) {status = 1}} END {if (status) {print status} else {print 0}}'
awk 'NR>2 {print prev} {prev=$0}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt/K562_GRO-seq_R1_cutadapt.txt | awk '{ if ($1 == 40) {status = 1}} END {if (status) {print status} else {print 0}}'
awk '/count/,0' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/cutadapt/K562_GRO-seq_R1_cutadapt.txt | awk 'NR>2 {print prev} {prev=$0}' | awk '{if ($3/$2 < 0.01) print $1, $2}' | awk '{a[NR]=$1; b[NR]=$2; max_len=$1}{if ($1 > max_len) {max_len=$1}} END{ for (i in a) print 1+max_len-a[i], b[i]}' | sort -nk1 | awk '($1 <= 20 && $1 >= 10){degradedSum += $2}; ($1 >= 30 && $1 <= 40){intactSum += $2} END {if (intactSum < 1) {intactSum = 1} print degradedSum/intactSum}'
(bowtie2 -p 12 -k 1 -D 20 -R 3 -N 1 -L 20 -i S,1,0.50 -x /project/shefflab/genomes/human_rDNA/bowtie2_index/default/human_rDNA --rg-id K562_GRO-seq -U /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/fastq/K562_GRO-seq_R1_processed.fastq --un /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/prealignments/K562_GRO-seq_human_rDNA_unmap.fq 2>&1 > /dev/null)
bowtie2 -p 12 --very-sensitive --rg-id K562_GRO-seq -x /project/shefflab/genomes/hg38/bowtie2_index/default/hg38 -U /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/prealignments/K562_GRO-seq_human_rDNA_unmap.fq | samtools view -bS - -@ 1  | samtools sort - -@ 1 -T /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/tmpuee4zdhj -o /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_temp.bam
samtools view -q 10 -b -@ 12 -U /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_fail_qc.bam /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_temp.bam > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam
samtools depth -b /project/shefflab/genomes/hg38/refgene_anno/default/hg38_pre-mRNA.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam | awk '{counter++;sum+=$3}END{print sum/counter}'
pigz -f -p 12 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/prealignments/K562_GRO-seq_human_rDNA_unmap.fq
samtools index /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_temp.bam
samtools idxstats /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_temp.bam | grep -we 'chrM' -we 'chrMT' -we 'M' -we 'MT' -we 'rCRSd' -we 'rCRSd_3k'| cut -f 3
samtools index /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam
samtools idxstats /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam | cut -f 1-2 | awk '{print $1, 0, $2}' | grep -vwe 'chrM' -vwe 'chrMT' -vwe 'M' -vwe 'MT' -vwe 'rCRSd' -vwe 'rCRSd_3k' > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/chr_sizes.bed
samtools view -L /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/chr_sizes.bed -b -@ 12 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_noMT.bam
mv /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_noMT.bam /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam
samtools index /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam
samtools stats /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam | grep '^SN' | cut -f 2- | grep 'maximum length:' | cut -f 2-
awk '{sum+=$2} END {printf "%.0f", sum}' /project/shefflab/genomes/hg38/fasta/default/hg38.chrom.sizes
/scratch/jps3dp/tools/databio//peppro/tools/bamQC.py --silent -i /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam -c 12 -o /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_bamQC.tsv
awk '{ for (i=1; i<=NF; ++i) { if ($i ~ "NRF") c=i } getline; print $c }' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_bamQC.tsv
awk '{ for (i=1; i<=NF; ++i) { if ($i ~ "PBC1") c=i } getline; print $c }' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_bamQC.tsv
awk '{ for (i=1; i<=NF; ++i) { if ($i ~ "PBC2") c=i } getline; print $c }' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_bamQC.tsv
samtools view -b -@ 12 -f 4  /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_temp.bam > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_unmap.bam
samtools view -c -f 4 -@ 12 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_temp.bam
samtools view -bh -F 20 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam
samtools view -bh -f 16 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_minus.bam
sed -n -e '/[[:space:]]+/w /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/plus_TSS.tsv' -e '/[[:space:]]-/w /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/minus_TSS.tsv' /project/shefflab/genomes/hg38/refgene_anno/default/hg38_TSS.bed
/scratch/jps3dp/tools/databio//peppro/tools/pyTssEnrichment.py -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/plus_TSS.tsv -p ends -c 12 -z -v -s 6 -o /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_plus_TssEnrichment.txt
/scratch/jps3dp/tools/databio//peppro/tools/pyTssEnrichment.py -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/minus_TSS.tsv -p ends -c 12 -z -v -s 6 -o /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_minus_TssEnrichment.txt
Rscript /scratch/jps3dp/tools/databio//peppro/tools/PEPPRO.R tss -i /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_plus_TssEnrichment.txt /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_minus_TssEnrichment.txt
samtools view -H /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam | grep 'SN:' | awk -F':' '{print $2,$3}' | awk -F' ' -v OFS='	' '{print $1,$3}' > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt
cut -f 1 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_keep.txt
grep -wf /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_keep.txt /project/shefflab/genomes/hg38/ensembl_gtf/default/hg38_ensembl_TSS.bed | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/hg38_ensembl_tss.bed
grep -wf /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_keep.txt /project/shefflab/genomes/hg38/ensembl_gtf/default/hg38_ensembl_gene_body.bed | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/hg38_ensembl_gene_body.bed
bedtools coverage -sorted -counts -s -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/hg38_ensembl_tss.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt | awk '$7>0' | sort -k4,4 -k7,7nr | sort -k4,4 -u > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_TSS_density.bed
bedtools coverage -sorted -counts -s -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/hg38_ensembl_gene_body.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt | awk '$7>0' | sort -k4 > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_gene_body_density.bed
join --nocheck-order -j4 -o 1.1 1.2 1.3 1.4 1.6 1.7 2.2 2.3 2.7 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_TSS_density.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_gene_body_density.bed | awk -v OFS='	' '{ if ($5 == ){print $1, $2, $8, $4, sqrt((($6+$9)/sqrt(($8-$2)^2))^2),($6/sqrt(($3-$2)^2))/($9/sqrt(($8-$7)^2)), $5} else {print $1, $2, $8, $4, sqrt((($6+$9)/sqrt(($3-$7)^2))^2),($6/sqrt(($3-$2)^2))/($9/sqrt(($8-$7)^2)), $5}}' | env LC_COLLATE=C sort -k1,1 -k2,2n > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/tmpxwvopkcb
# Pipeline started at 06-11 19:10:14

join --nocheck-order -j4 -o 1.1 1.2 1.3 1.4 1.6 1.7 2.2 2.3 2.7 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_TSS_density.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_gene_body_density.bed | awk -v OFS='	' '{ if ($5 == "+"){print $1, $2, $8, $4, sqrt((($6+$9)/sqrt(($8-$2)^2))^2), ($6/sqrt(($3-$2)^2))/($9/sqrt(($8-$7)^2)), $5} else {print $1, $2, $8, $4, sqrt((($6+$9)/sqrt(($3-$7)^2))^2),($6/sqrt(($3-$2)^2))/($9/sqrt(($8-$7)^2)), $5}}' | env LC_COLLATE=C sort -k1,1 -k2,2n > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/tmpnu6wpr_c
awk '{print $5}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/tmpnu6wpr_c | sort -n | awk 'BEGIN{i=0} {s[i]=$1; i++;} END{print s[int(NR*0.5-0.5)]}
# Pipeline started at 06-14 21:14:01

join --nocheck-order -j4 -o 1.1 1.2 1.3 1.4 1.6 1.7 2.2 2.3 2.7 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_TSS_density.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_gene_body_density.bed | awk -v OFS='	' '{ if ($5 == "+"){print $1, $2, $8, $4, sqrt((($6+$9)/sqrt(($8-$2)^2))^2), ($6/sqrt(($3-$2)^2))/($9/sqrt(($8-$7)^2)), $5} else {print $1, $2, $8, $4, sqrt((($6+$9)/sqrt(($3-$7)^2))^2),($6/sqrt(($3-$2)^2))/($9/sqrt(($8-$7)^2)), $5}}' | env LC_COLLATE=C sort -k1,1 -k2,2n > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/tmp4yy_drxc
awk '{print $5}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/tmp4yy_drxc | sort -n | awk 'BEGIN{i=0} {s[i]=$1; i++;} END{print s[int(NR*0.5-0.5)]}'
awk -v OFS='	' '{ if ($5 > 0.0172824) {print $1, $2, $3, $4, $6, $7}}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/tmp4yy_drxc > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_pause_index.bed
sort -k5,5n /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_pause_index.bed | awk ' { a[i++]=$5; } END { x=int((i+1)/2); if (x < (i+1)/2) print (a[x-1]+a[x])/2; else print a[x-1]; }'
Rscript /scratch/jps3dp/tools/databio//peppro/tools/PEPPRO.R pi --annotate -i /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_pause_index.bed
pigz -f -p 12 -f /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_pause_index.bed
samtools view -@ 4 -c -L /project/shefflab/genomes/hg38/refgene_anno/default/hg38_pre-mRNA.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam
samtools view -@ 4 -c -L /project/shefflab/genomes/hg38/refgene_anno/default/hg38_pre-mRNA.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_minus.bam
grep -wf /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_keep.txt /project/shefflab/genomes/hg38/refgene_anno/default/hg38_pre-mRNA.bed | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/hg38_gene_sort.bed
bedtools coverage -sorted -counts -s -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/hg38_gene_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/signal_hg38/K562_GRO-seq_gene_coverage.bed
ln -sf /project/shefflab/genomes/hg38/feat_annotation/default/hg38_annotations.bed.gz /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/raw/hg38_annotations.bed.gz
pigz -f -p 12 -d -c /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/raw/hg38_annotations.bed.gz > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/raw/hg38_annotations.bed
cut -f 4 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/raw/hg38_annotations.bed | sort -u
awk -F'	' '{print>"/project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/"$4}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/raw/hg38_annotations.bed
cut -f 1 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt | grep -wf - /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Enhancer | cut -f 1-3 | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Enhancer_sort.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Enhancer_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Enhancer_plus_coverage.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Enhancer_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_minus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Enhancer_minus_coverage.bed
cut -f 1 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt | grep -wf - /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Promoter | cut -f 1-3 | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Promoter_sort.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Promoter_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Promoter_plus_coverage.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Promoter_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_minus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Promoter_minus_coverage.bed
mv "/project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Promoter Flanking Region" "/project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Promoter_Flanking_Region"
cut -f 1 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt | grep -wf - /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Promoter_Flanking_Region | cut -f 1-3 | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Promoter_Flanking_Region_sort.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Promoter_Flanking_Region_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Promoter_Flanking_Region_plus_coverage.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Promoter_Flanking_Region_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_minus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Promoter_Flanking_Region_minus_coverage.bed
mv "/project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/5' UTR" "/project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/5_UTR"
cut -f 1 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt | grep -wf - /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/5_UTR | cut -f 1-3 | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/5_UTR_sort.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/5_UTR_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_5_UTR_plus_coverage.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/5_UTR_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_minus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_5_UTR_minus_coverage.bed
mv "/project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/3' UTR" "/project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/3_UTR"
cut -f 1 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt | grep -wf - /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/3_UTR | cut -f 1-3 | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/3_UTR_sort.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/3_UTR_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_3_UTR_plus_coverage.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/3_UTR_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_minus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_3_UTR_minus_coverage.bed
cut -f 1 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt | grep -wf - /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Exon | cut -f 1-3 | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Exon_sort.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Exon_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Exon_plus_coverage.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Exon_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_minus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Exon_minus_coverage.bed
cut -f 1 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt | grep -wf - /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Intron | cut -f 1-3 | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Intron_sort.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Intron_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Intron_plus_coverage.bed
bedtools coverage -sorted  -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/Intron_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_minus.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Intron_minus_coverage.bed
samtools view -@ 12 -q 10 -c -F4 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam
Rscript /scratch/jps3dp/tools/databio//peppro/tools/PEPPRO.R frif -s K562_GRO-seq -z 3099922541 -n 9058650 -y cfrif --reads -o /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_cFRiF.pdf --bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Enhancer_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Promoter_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Promoter_Flanking_Region_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_5_UTR_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_3_UTR_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Exon_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Intron_plus_coverage.bed
Rscript /scratch/jps3dp/tools/databio//peppro/tools/PEPPRO.R frif -s K562_GRO-seq -z 3099922541 -n 9058650 -y frif --reads -o /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_FRiF.pdf --bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Enhancer_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Promoter_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Promoter_Flanking_Region_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_5_UTR_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_3_UTR_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Exon_plus_coverage.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_Intron_plus_coverage.bed
grep -wf /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_keep.txt /project/shefflab/genomes/hg38/refgene_anno/default/hg38_exons.bed | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/hg38_exons_sort.bed
grep -wf /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_keep.txt /project/shefflab/genomes/hg38/refgene_anno/default/hg38_introns.bed | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt | bedtools sort -i stdin -faidx /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/hg38_introns_sort.bed
bedtools coverage -sorted -counts -s -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/hg38_exons_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_exons_coverage.bed
bedtools coverage -sorted -counts -s -a /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/hg38_introns_sort.bed -b /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_sort.bam -g /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/chr_order.txt > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_introns_coverage.bed
awk -v OFS='	' '{chrom[$4] = $1; if($4!=prev4) {chromStart[$4] = $2} strand[$4] = $6; readCount[$4] += $7; exonCount[$4] += 1; geneSizeKB[$4] += (sqrt(($3-$2+0.00000001)^2)/1000); gene[$4] = $4; chromEnd[$4]=$3; prev4=$4} END { for (a in readCount) { print chrom[a], chromStart[a], chromEnd[a], gene[a], (readCount[a]/18.431408)/geneSizeKB[a], strand[a]}}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_exons_coverage.bed | awk '$5>0' | sort -k4 > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_exons_rpkm.bed
awk -v OFS='	' '{chrom[$4] = $1; if($4!=prev4) {chromStart[$4] = $2} strand[$4] = $6; readCount[$4] += $7; exonCount[$4] += 1; geneSizeKB[$4] += (sqrt(($3-$2+0.00000001)^2)/1000); gene[$4] = $4; chromEnd[$4]=$3; prev4=$4} END { for (a in readCount) { print chrom[a], chromStart[a], chromEnd[a], gene[a], (readCount[a]/18.431408)/geneSizeKB[a], strand[a]}}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_introns_coverage.bed | awk '$5>0' | sort -k4 > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_introns_rpkm.bed
join --nocheck-order -a1 -a2 -j4 /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_introns_rpkm.bed /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_exons_rpkm.bed | awk -v OFS='	' 'NF==11 {print $7, $8, $9, $1, ($10/$5), $11}' | sort -k1,1 -k2,2n > /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_exon_intron_ratios.bed
awk '{print $5}' /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_exon_intron_ratios.bed | sort -n | awk ' { a[i++]=$1; } END { x=int((i+1)/2); if (x < (i+1)/2) print (a[x-1]+a[x])/2; else print a[x-1]; }'
Rscript /scratch/jps3dp/tools/databio//peppro/tools/PEPPRO.R mrna -i /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_exon_intron_ratios.bed --annotate
pigz -f -p 12 -f /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/QC_hg38/K562_GRO-seq_exon_intron_ratios.bed
samtools index /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam
/scratch/jps3dp/tools/databio//peppro/tools/bamSitesToWig.py -i /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_plus.bam -c /project/shefflab/genomes/hg38/fasta/default/hg38.chrom.sizes -o /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/signal_hg38/K562_GRO-seq_plus_exact_body_0-mer.bw -w /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/signal_hg38/K562_GRO-seq_plus_smooth_body_0-mer.bw -p 8 --variable-step --scale 18431408.0
samtools index /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_minus.bam
/scratch/jps3dp/tools/databio//peppro/tools/bamSitesToWig.py -i /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/aligned_hg38/K562_GRO-seq_minus.bam -c /project/shefflab/genomes/hg38/fasta/default/hg38.chrom.sizes -o /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/signal_hg38/K562_GRO-seq_minus_exact_body_0-mer.bw -w /project/shefflab/processed/peppro/paper/6.11.2020/results_pipeline/K562_GRO-seq/signal_hg38/K562_GRO-seq_minus_smooth_body_0-mer.bw -p 8 --variable-step --scale 18431408.0
