# Configuration - this will not appear in the slideshow alias grep='grep --color' ls # Contents of the PEB workshop directory ls -l head start_here.txt head data/exercise1_grep.txt grep start data/exercise1_grep.txt grep help data/exercise1_grep.txt # If we do a search for "ignorecase" without any option, we only get some of the lines. # You can notice that the cow is not properly displayed :-) grep ignorecase data/exercise1_grep.txt # The -i option allows to do a case-insensitive search. # As you can see, some lines contain upper case characters: grep -i ignorecase data/exercise1_grep.txt # To solve the exercise, we also have to count the number of output lines. # This can be done with the "-c" option: grep -i -c ignorecase data/exercise1_grep.txt # solution: how to find the instructions for the next exercise grep 21 data/exercise1_grep.txt # solution: you can use the "*" character to specify multiple files: grep 'regex' data/multiplefiles/* head data/genes/mgat_genes.gb grep ORGANISM data/genes/mgat_genes.gb | grep 'Homo sapiens' # Solution: grep for "bos taurus": grep ORGANISM data/genes/mgat_genes.gb | grep taurus head data/genes/sequences.fasta grep 'AAA..TTT' data/genes/sequences.fasta grep -B1 'AAA..TTT' data/genes/sequences.fasta # Bonus: pipe an additional grep '>' to see a cow: grep -B1 'AAA..TTT' data/genes/sequences.fasta | grep '>' head data/genes/chr8.gff awk '$1=="chr8" && $4>100000 && $5<200000 ' data/genes/chr8.gff awk '$4 > 5000000 && $5 < 10000000 ' data/genes/chr8.gff awk '{print $1, $5-$4}' data/genes/chr8.gff | head awk '{print $1, $5-$4, $9}' data/genes/chr8.gff | grep -v '^#' | head awk '{print $1, $5-$4, $9}' data/genes/chr8.gff | grep -v '^#' | awk -F';' '{print $1, $2}' | head awk '$9 ~ /symbol=MIR/ {print $0}' data/genes/chr8.gff awk '$9 ~ /POU5F1B/ {print $5-$4}' data/genes/chr8.gff awk '$9 ~ /gene_id=1584/ {print $0}' data/genes/chr8.gff head Makefile make testrule make cow