!"#$" "& "'() *"$#+, "& -)" *.--)*"+&,* !"#$ !()/+)*0 1),-.) 2+$.* -&(&)* ,(( 3456478 *"$#+,* *)9)/"):; 38 <'()* = 64>> /&?(9)") -),&?)*; <'()0 1),-.) 2+$.* "'() @ -&(&)* ,(( 345@ABB *"$#+,* *)9)/"):; 3@ABB !"$#+,* = @AB7 /&?(9)") -),&?)*; <'()0 1),-.) 2+$.* "'() A 3457C6 *"$#+,* *)9)/"):; 37C6 !"$#+,* = 7A7 /&?(9)") -),&?)*; <'()0 1),-.) 2+$.* "'() A <D#+9#,:5@>>B@5BC 345@ *"$#+,* *)9)/"):; 3@ !"$#+, = A /&?(9)") -),&?)*; < 1 + " -./.01 2345-6.-7 18 390/5%. 39 -.,40: EF/9.:) (#$"+#99' *)G.),/): -),&?)* H.?( "& *"$#+, +, "#F&,&?'0 !"#$"0 IIII IIII 08//.01389 ;.,4 <& #:: ?&,"D "& *)#$/DJ *)) K:2#,/) !)#$/D L("+&,*0 M&,"D N#,-) -,<=/. /80,1389 !"" !#$%&'() +(#,( !)-.&""( !./0%("&( 1()-"(2$/3 1$"&4$ 1%(4&" 1%&0&/3 5&%-&) 6/"()2/ 1%.)$& 1.%7&)( 8(/, 9(#:,2&( 93&"$ 93&)( 9,",#:&( 9,#,%,/ 9,,7 6/"()2/ :8-1 -./.01389 !"" >.#() ?,/@.&0, ?,./$ A)7),B) !"#$%" '"()*+ !)#$/D T&$ 2+$.* -),&?+/ *)G.),/)* #,: $)9#"): +,T&$?#"+&,R I&. /#, *)#$/D T&$ "D) UD&9) 2+$.* T#?+9' &$ *)#$/D T&$ *()/+T+): -),.*J *()/+)* )"/R I&. /#, #9*& T+,: '&.$ *"$#+, &$ -),&?) $)/&$: +T '&. D#2) +"* +,T&$?#"+&,J *./D #* *"$#+, ,#?)J #//)**+&,R V&?) W),&?) !)#$/D !"#$%" !EKNSV 1K<K KXKYIZE [ \]!QKY]ZE ^LN_`EXSV \]NQ! OKM]Y]E! VLME !"#$ &'($)* $'+#$,'- ./ 0',"1'&2 !"#$%& ($)*"$)# 34&56(74,0 89 5'$ 5(0' : &"$+'- ;7 <5')4'& =(1': <+$(4, =(1': >',?(,@ A))'&&4", (&)',-4,067 +),-.#/ !"**)01, 233 *4 54$6)01 !"* !#7" !"#$%& +480.4#3 !"#$%" '"()*+ ,"-./0 94:$ !"."%*"3 ;*"<,= >? )*"<, ,"."%*"3 @ +","."%* 2.. <'6')+ (66 ./ 0',"1'& !*$#)0 A#<" !-"%)", A#<" !"D:"0%" E"01*& +#*" F4,* B"0C#06 F4,* (4:0*$/ G4. H/-" 3BC!99DBB8E. 3',0#' F4$#& B B999 G,@,"H, D=IAD C(6(7&4( 0',"14) J=A 3K=LDBI?JI?M3DLB./EIB999 3',0#' F4$#& B B999 N#1(, N"1" &(54',& ?$(O46 0',"14) J=A 3K=LDBIPQI?M3DL..ERISRRR 3',0#' F4$#& B SRRR N#1(, N"1" &(54',& P"6"1;4( 0',"14) J=A 3K=LDBI=MI?M3DLB.VVIB999 3',0#' F4$#& B WXU899E9 S9/BR B999 N#1(, N"1" &(54',& =4)($(0#( 0',"14) J=A 3K=LDBI=MI?M3DLB.VEIB999 3',0#' F4$#& B WXU899ES S9ER9 B999 N#1(, N"1" &(54',& =4)($(0#( 0',"14) J=A I:0 20#./,), N"1' >',"1' <'($)* J'+& ;3"0*)J/ !)<).#$ !"D:"0%", KCE2!HL 20#./M" !"D:"0%" N#$)#*)40 K!AOL 2.)10 !"D:"0%", KG!2L G"*#3#*#P3$)7"0 (4<-#$#*)7" 20#./,), H44. B"0"$#*" O&/.41"0"*)% H$"" <KAJPN 3AYA A=AZ![K \ LM<GAZM[K ]QJ^?K=PN LMJG< WACMZMK< NQCK !" ,$7+,D/ R$,$ 0,$U"+*/.E /$.$7#$D C,+T /$-,7A ,$/*.#/ #$%&' )*+&*$,*) - ./012' /. )*+&*$,*) %0/3#4*4 - 86-."26$D ZHL)H H."26$D ZHL)H F$V*/ !.*/#-. ! """ $%&' ()'%*+,- """ )*+&*$,* '5%* - F*7.$+#"D$ $&16*0 /. 70/&%) , 328&* '90*)9/84 )A$ ![U-.*$ #A,$/A+.D "/ */$D -/ #A$ :;<=:>:0,+5-5."#E .$U$. C+, #A$ ."\$."A++D #A-# #A$ 0+/"#"+6 "/ D"CC$,$6# -T+62 #A$ 2,+*0/ /"T0.E 5E 7A-67$N ]6#$, #A,$/A+.D U-.*$: ?N?^ ,?@A=@>B ,CB;D )BE>B@FB 7D?>G=@H !"#$%$#$&%'()"* ,-./$'$#()" 0*$123(3 4--1 T$#-[!H)L R".. -..+R */$,/ +C #A$ U",*/[+,"$6#$D R$5/"#$/ #+ $-/".E 0$,C+,T #A$", +R6 7*/#+T"_$D 7+T0-,-#"U$ 2$6+T"7/ -6-.E/$/ "6 -6 -*#+T-#$D C-/A"+6 R"#A T"6"T-. T-6*-. T-6"0*.-#"+6N 8/"62 T$#-[!H)L= */$, 7-6 0$,C+,T - /#-#"/#"7-. -6-.E/"/ +6 /$3*$67$/ -//"26$D #+ D"CC$,$6# 2,+*0/ "6 +,D$, #+ D$#$,T"6$ ,$/"D*$/ #A-# /"26"C"7-6#.E 7+,,$.-#$ R"#A +6$ +, T+,$ T$#-D-#- C"$.D/N !"#$% '( )*#$+,*- ./0 1 +$23,+$4 5,$64 J+T$ S$6+T$ L$-,7A ($/*.#/ W$#-D-#-[D,"U$6 !+T0-,-#"U$ H6-.E/"/ )++. C+, L$3*$67$/ IT$#-[!H)LM !"#$%$%&%'#( L]H(!J 9H)H HFHX`Y] a &GL8HXGY] 4;(b1]F!J &G(8L ZHWGXG]L J;W] ViPR is funded by the National Institute of Allergy and Infectious Diseases (NIH / DHHS) under Contract No. HHSN272200900041C and is a collaboration between Northrop Grumman Health IT, University of Texas Southwestern Medical Center and Vecna Technologies. Comments, questions, suggestions? Contact us at [email protected]What is Meta-CATS? A unique comparative genomics analysis tool in ViPR to identify nucleotide/amino acid positions that significantly differ between two or more groups of virus sequences. How does Meta-CATS work? It runs a multiple sequence alignment, a chi-square test to identify positions that significantly differ from the random distribution of residues between all metadata groups, and a Pearson's chi-square test to identify the specific pairs of groups that contribute to the observed statistical difference. How to use Meta-CATS? • Input nucleotide/amino acid sequences to Meta-CATS • Divide sequences into two or more groups based on metadata (phenotype) • Run Meta-CATS Option 1: Search for sequences and then input sequences to Meta-CATS http://www.viprbrc.org/ Freely available Integrated datasets Bioinformatics tool suite 1 2 3 Select sequences and add them to a working set for future analysis. You’ll need to register for a Workbench account to use this feature. • Select display fields • Custom-sort records Click to view details of the record 4 Let ViPR automatically group sequences by host, country, year, viral species, or virus type. On the ViPR homepage, choose a virus family to start. 1. Identify sequences to include in the Meta-CATS analysis: mouse- over the “Search Data” tab and click “Genomes” or “Genes & Proteins”. For this example, we will use genome sequences. 2. Select search criteria on the Genome Search page and click the “Search” button to run your query. 3. Select sequences from the search result page by clicking the checkboxes. Mouse-over the yellow “Run Analysis” button, and click “Metadata-driven Comparative Analysis Tool”. If you want to include sequences that are not in this search result, select desired sequences and click “Add to Working Set”, then add additional sequences to the same working set. Click the “Workbench” tab, find the working set you saved and click next to it. On the working set details page, mouse-over “Run Analysis” and click “Metadata-driven Comparative Analysis Tool”. 4. On the next page, choose the number of groups. If you want to group your sequences by host, country, year, viral species, or virus type, you can use ViPR’s auto grouping feature by clicking the “Auto Grouping” drop-down menu and follow the prompts. Otherwise, you can group sequences manually on the next page. Adjust the C-value threshold if needed. Then click “Continue”.
This document is posted to help you gain knowledge. Please leave a comment to let me know what you think about it! Share it to your friends and learn new things together.
ViPR is funded by the National Institute of Allergy and Infectious Diseases (NIH / DHHS) under Contract No. HHSN272200900041C and is a collaboration between Northrop Grumman Health IT, University of Texas Southwestern Medical Center and Vecna Technologies. Comments, questions, suggestions? Contact us at [email protected]
What is Meta-CATS? A unique comparative genomics analysis tool in ViPR to identify nucleotide/amino acid positions that significantly differ between two or more groups of virus sequences.
How does Meta-CATS work? It runs a multiple sequence alignment, a chi-square test to identify positions that significantly differ from the random distribution of residues between all metadata groups, and a Pearson's chi-square test to identify the specific pairs of groups that contribute to the observed statistical difference.
How to use Meta-CATS? • Input nucleotide/amino acid sequences to Meta-CATS • Divide sequences into two or more groups based on metadata (phenotype) • Run Meta-CATS
Option 1: Search for sequences and then input sequences to Meta-CATS
http://www.viprbrc.org/
Freely available Integrated datasets Bioinformatics tool suite
1 2
3
Select sequences and add them to a working set for future analysis.
You’ll need to register for a Workbench account to
use this feature.
• Select display fields • Custom-sort records
Click to view details of
the record
4
Let ViPR automatically group sequences by host,
country, year, viral species, or virus type.
On the ViPR homepage, choose a virus family to start.
1. Identify sequences to include in the Meta-CATS analysis: mouse-over the “Search Data” tab and click “Genomes” or “Genes & Proteins”. For this example, we will use genome sequences.
2. Select search criteria on the Genome Search page and click the “Search” button to run your query.
3. Select sequences from the search result page by clicking the checkboxes. Mouse-over the yellow “Run Analysis” button, and click “Metadata-driven Comparative Analysis Tool”. If you want to include sequences that are not in this search result, select desired sequences and click “Add to Working Set”, then add additional sequences to the same working set. Click the “Workbench” tab, find the working set you saved and click next to it. On the working set details page, mouse-over “Run Analysis” and click “Metadata-driven Comparative Analysis Tool”.
4. On the next page, choose the number of groups. If you want to group your sequences by host, country, year, viral species, or virus type, you can use ViPR’s auto grouping feature by clicking the “Auto Grouping” drop-down menu and follow the prompts. Otherwise, you can group sequences manually on the next page. Adjust the C-value threshold if needed. Then click “Continue”.
Option 2: Use a working set or upload your own sequences and then run Meta-CATS
1. Mouse-over the “Analyze & Visualize” tab and click “Metadata Sequence Analysis”.
2. On the Meta-CATS tool landing page, input your sequences using one of the three options:
2.1 Upload a sequence file and a sequence assignment file.
2.2 Paste sequences in FASTA format. 2.3 Use one or more working sets from your
Workbench. 3. Choose the format of sequences provided
and sequence type. 4. Choose the number of groups. Adjust the C-
value threshold if needed. Then click “Continue”.
2
2.1
Three options to input sequences
2.3
Divide sequences into groups based on metadata (phenotype) and run Meta-CATS
Choose the format of sequences
http://www.viprbrc.org/
Freely available Integrated datasets Bioinformatics tool suite
ViPR is funded by the National Institute of Allergy and Infectious Diseases (NIH / DHHS) under Contract No. HHSN272200900041C and is a collaboration between Northrop Grumman Health IT, University of Texas Southwestern Medical Center and Vecna Technologies. Comments, questions, suggestions? Contact us at [email protected]
5. Divide sequences into groups based on scientific knowledge: double click the desired sequences and then click “Add” to add them to their respective groups. When finished, click “Run”.
6. The analysis may take a few minutes to run. While the analysis is running, you can choose to save the analysis (upon completion) to your Workbench by entering a name for the analysis and then clicking the “Save to Workbench” button. Then you can move to other parts of the ViPR site, and retrieve the analysis results later from your Workbench.
7. The Meta-CATS report includes: • Chi-square Goodness of fit test result:
positions that have significant non-random distribution between the specified groups.
• Pearson’s Chi-square Pairwise Comparison Report: positions that significantly differ between the groups.
• Save the analysis to your Workbench if needed. You can also share the analysis with collaborators.