1、背景知识

  • The usage of Alternative Transcription Start sites (aTSS可选择转录起始位点), Alternative Splicing (AS可选择剪切位点) and alternative Transcription Termination Sites (aTTS可选择终止位点) are collectively collectively results in the production of different isoforms.
A) Schematic representation of a mRNA transcript. Isoforms can be... |  Download Scientific Diagram
  • Alternative isoforms are widely used as recently demonstrated by The ENCODE Consortium, which found that on average, 6.3 different transcripts are generated per gene; a number which may vary considerably per gene.

  • 目前基因转录本水平的定量表达分析软件有:Kallisto, Salmon, RSEM or StringTie等

  • IsoformSwitchAnalyzeR包可基于上述转录本定量结果,分析同一基因在对照/实验条件下(normal/disease)是否会表达不同比例的转录本。

    LwNUYk.png
  • 如上图所示,IsoformSwitchAnalyzeR分析流程主要分为两大部分:(1) identify isoform switch[基础分析]; (2) predict potential functional consequences of the identified isoform switches[进阶分析]

2、基础分析

1
2
3
library(IsoformSwitchAnalyzeR)
library(tidyverse)
library(data.table)

2.1 初步创建switchAnalyzeRlist对象

2.1.1 导入转录本定量数据

使用的是IsoformSwitchAnalyzeR的示例数据,为salmon软件的定量结果。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#2组,各两个样本
list.files("data/salmon", 
           recursive = T, full.names = T)
# [1] "data/salmon/hESC_0/quant.sf.gz"
# [2] "data/salmon/hESC_1/quant.sf.gz"
# [3] "data/salmon/iPS_0/quant.sf.gz" 
# [4] "data/salmon/iPS_1/quant.sf.gz"

fread("data/salmon/hESC_0/quant.sf.gz") %>% head
#包含5列,分别为转录本id, 转录本长度, 有效长度,TPM标准化,原始count
#              Name Length EffectiveLength         TPM   NumReads
# 1: TCONS_00000001   1652            1652       0.000    0.00000
# 2: TCONS_00000002   1488            1488       0.000    0.00000
# 3: TCONS_00000003   1595            1595       0.000    0.00000
# 4: TCONS_00000006     78              78 1377990.000    4.67384
# 5: TCONS_00000007   2750            2750     329.487  589.15400
# 6: TCONS_00000008   4369            4369     358.067 1050.07000

#importIsoformExpression会自动识别salmon的定量结果,储存到list对象里
salmonQuant <- importIsoformExpression(
  parentDir = "data/salmon"
)
salmonQuant$abundance %>% head(3)
#       isoform_id hESC_0   hESC_1    iPS_0    iPS_1
# 1 TCONS_00000001      0 0.000000 0.000000 4.659597
# 2 TCONS_00000002      0 1.564879 5.504247 2.818824
# 3 TCONS_00000003      0 0.000000 0.000000 0.000000
salmonQuant$counts %>% head(3)
#       isoform_id hESC_0    hESC_1    iPS_0    iPS_1
# 1 TCONS_00000001      0 0.0000000  0.00000 18.13313
# 2 TCONS_00000002      0 0.1116201 21.10248 10.96964
# 3 TCONS_00000003      0 0.0000000  0.00000  0.00000

2.1.2 样本分组信息

1
2
3
4
5
6
7
8
myDesign <- data.frame(
  sampleID = c("hESC_0", "hESC_1", "iPS_0", "iPS_1"),
  condition = c("hESC", "hESC", "iPS", "iPS"))
#   sampleID condition
# 1   hESC_0      hESC
# 2   hESC_1      hESC
# 3    iPS_0       iPS
# 4    iPS_1       iPS

2.1.3 其它注释信息

(1)gtf文件: 转录本的外显子组成以及对应基因

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
fread("data/example.gtf.gz") %>% head(2)
#      V1        V2   V3    V4    V5 V6 V7 V8
# 1: chr1 cufflinks exon 11874 12227  .  +  .
# 2: chr1 cufflinks exon 11874 12227  .  +  .
#                                                        V9
# 1: transcript_id "TCONS_00000001"; gene_id "XLOC_000001";
# 2: transcript_id "TCONS_00000002"; gene_id "XLOC_000001";

fread("data/example.gtf.gz") %>% 
  dplyr::count(V3)
#      V3     n
# 1:  CDS  9612
# 2: exon 10929

(2)fasta文件:转录本的核苷酸序列

1
2
3
4
5
6
7
8
fread("data/example_isoform_nt.fasta") %>% head()
#                                                                     >TCONS_00000001
# 1: CTTGCCGTCAGCCTTTTCTTTGACCTCTTCTTTCTGTTCATGTGTATTTGCTGTCTCTTAGCCCAGACTTCCCGTGTCCT
# 2: TTCCACCGGGCCTTTGAGAGGTCACAGGGTCTTGATGCTGTGGTCTTCATCTGCAGGTGTCTGACTTCCAGCAACTGCTG
# 3: GCCTGTGCCAGGGTGCAAGCTGAGCACTGGAGTGGAGTTTTCCTGTGGAGAGGAGCCATGCCTAGAGTGGGATGGGCCAT
# 4: TGTTCATCTTCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGA
# 5: GTGAGAGCATCAACTTCTCTCACAACCTAGGCCAGTGTGTGGTGATGCCAGGCATGCCCTTCCCCAGCATCAGGTCTCCA
# 6: GAGCTGCAGAAGACGACGGCCGACTTGGATCACACTCTTGTGAGTGTCCCCAGTGTTGCAGAGGCAGGGCCATCAGGCAC

2.1.4 创建switchAnalyzeRlist对象

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
sar1 <- importRdata(
  isoformCountMatrix   = salmonQuant$counts,
  isoformRepExpression = salmonQuant$abundance,
  designMatrix         = myDesign,
  isoformExonAnnoation = "data/example.gtf.gz",
  isoformNtFasta       = "data/example_isoform_nt.fasta"
)

sar1 %>% length()
# 11
names(sar1)
# [1] "isoformFeatures"      "exons"                "conditions"          
# [4] "designMatrix"         "sourceId"             "isoformCountMatrix"  
# [7] "isoformRepExpression" "runInfo"              "orfAnalysis"         
# [10] "isoformRepIF"         "ntSequence" 

sar1$isoformFeatures %>% head(2)
# iso_ref/gene_ref : 新的唯一编号,不重复
# gene_overall_mean/iso_overall_mean : 基因/转录本在所有样本的平均表达量
# IF_overall(isoform fraction=isoform_exp / gene_exp): 在所有样本中,特定基因表达该转录本的比例

#              iso_ref          gene_ref     isoform_id gene_id condition_1 condition_2
# 280 isoComp_00000001 geneComp_00000001 TCONS_00000316 AADACL3        hESC         iPS
# 281 isoComp_00000002 geneComp_00000001 TCONS_00000317 AADACL3        hESC         iPS
#     gene_name gene_biotype iso_biotype gene_overall_mean gene_value_1 gene_value_2
# 280   AADACL3           NA          NA          25.37693     32.66891     18.08494
# 281   AADACL3           NA          NA          25.37693     32.66891     18.08494
#     gene_stderr_1 gene_stderr_2 gene_log2_fold_change gene_q_value iso_overall_mean
# 280       10.1063      6.337321            -0.8527734           NA         3.704839
# 281       10.1063      6.337321            -0.8527734           NA        21.672089
#     iso_value_1 iso_value_2 iso_stderr_1 iso_stderr_2 iso_log2_fold_change iso_q_value
# 280    4.846923    2.562755     3.823911     2.562511           -0.9167289          NA
# 281   27.821989   15.522189     6.282387     3.774811           -0.8414829          NA
#     IF_overall   IF1     IF2      dIF isoform_switch_q_value gene_switch_q_value   PTC
# 280   0.114475 0.124 0.10495 -0.01905                     NA                  NA FALSE
# 281   0.885525 0.876 0.89505  0.01905                     NA                  NA FALSE

sar1$isoformFeatures %>% 
  dplyr::filter(gene_id=="AADACL3") %>% 
  dplyr::select(isoform_id, gene_id, dplyr::contains("IF"))
#       isoform_id gene_id IF_overall   IF1     IF2      dIF
# 1 TCONS_00000316 AADACL3   0.114475 0.124 0.10495 -0.01905
# 2 TCONS_00000317 AADACL3   0.885525 0.876 0.89505  0.0190


#ORF转录本开放阅读框注释
sar1$orfAnalysis %>% head(2)
# orfTransciptStart/orfTransciptEnd 基于转录本核苷酸序列(剪切后)的起始位置
# orfStartGenomic/orfEndGenomic 基于基因组坐标(剪切前)的起始位置
#       isoform_id orfTransciptStart orfTransciptEnd orfTransciptLength
# 1 TCONS_00000001              1402            1629                228
# 2 TCONS_00000002               317             715                399
#   orfStarExon orfEndExon orfStartGenomic orfEndGenomic
# 1           3          3           14159         14386
# 2           1          3           12190         13636
#   stopDistanceToLastJunction stopIndex   PTC orf_origin
# 1                      -1163         3 FALSE Annotation
# 2                       -231         3 FALSE Annotation

2.2 过滤

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
sar2 <- preFilter(sar1)
#默认过滤标准
# geneExpressionCutoff = 1 : 根据gene_overall_mean列过滤低表达基因
# isoformExpressionCutoff = 0 : 根据iso_overall_mean列过滤低表达基因
# IFcutoff=0.01: 根据IF_overall过滤
# dIFcutoff = 0.1: 根据dIF过滤(绝对值)
# removeSingleIsoformGenes = TRUE: 删除只有一个转录本的基因

dim(sar1)
# [1] 1060   30
dim(sar2)
# [1] 760  30
sar2
# This switchAnalyzeRlist list contains:
# 760 isoforms from 208 genes
# 1 comparison from 2 conditions (in total 4 samples)
# 
# Feature analyzed:
#   [1] "ORFs, ntSequence"

2.3 isoformSwitch分析

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
sar3 <- isoformSwitchTestDEXSeq(sar2)
sar3$isoformFeatures %>% head(2)
# 如下,主要增加了isoform_switch_q_value与gene_switch_q_value两列信息

#     iso_ref          gene_ref     isoform_id gene_id condition_1
# 557 isoComp_00000004 geneComp_00000003 TCONS_00003880   ACAP3        hESC
# 558 isoComp_00000005 geneComp_00000003 TCONS_00003881   ACAP3        hESC
#     condition_2 gene_name gene_biotype iso_biotype gene_overall_mean
# 557         iPS     ACAP3           NA          NA          910.5955
# 558         iPS     ACAP3           NA          NA          910.5955
#     gene_value_1 gene_value_2 gene_stderr_1 gene_stderr_2
# 557     1771.599     49.59168        377.02      15.68552
# 558     1771.599     49.59168        377.02      15.68552
#     gene_log2_fold_change gene_q_value iso_overall_mean iso_value_1
# 557             -5.158528           NA         241.0513    478.7646
# 558             -5.158528           NA         186.6521    365.0107
#     iso_value_2 iso_stderr_1 iso_stderr_2 iso_log2_fold_change
# 557    3.337951    175.44243     3.337951            -7.159924
# 558    8.293521     89.61604     1.997000            -5.458111
#     iso_q_value IF_overall     IF1     IF2      dIF
# 557          NA   0.156075 0.26100 0.05115 -0.20985
# 558          NA   0.188100 0.20455 0.17165 -0.03290
#     isoform_switch_q_value gene_switch_q_value   PTC
# 557              0.2601812          0.02341811 FALSE
# 558              0.7079831          0.02341811 FALSE



#如下结果gene_switch_q_value即为isoform_switch_q_value的最小值
sar3$isoformFeatures %>% 
  dplyr::filter(gene_id=="ACAP3") %>% 
  dplyr::select(isoform_id, gene_id,dIF, 
                dplyr::contains("switch"))
#       isoform_id gene_id      dIF isoform_switch_q_value gene_switch_q_value
# 1 TCONS_00003880   ACAP3 -0.20985             0.26018123          0.02341811
# 2 TCONS_00003881   ACAP3 -0.03290             0.70798307          0.02341811
# 3 TCONS_00003882   ACAP3  0.29825             0.02341811          0.02341811
# 4 TCONS_00003883   ACAP3 -0.05555             0.58385549          0.02341811

switchPlot(sar3, gene = 'ACAP3',
           condition1="hESC",
           condition2="iPS")
LwNiaw.png
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
#四个子图
switchPlotGeneExp(sar3, gene = 'ACAP3',
                  condition1="hESC",
                  condition2="iPS")
switchPlotIsoExp(sar3, gene = 'ACAP3',
                  condition1="hESC",
                  condition2="iPS")
switchPlotIsoUsage(sar3, gene = 'ACAP3',
                 condition1="hESC",
                 condition2="iPS")
switchPlotTranscript(sar3, gene = 'ACAP3',
                   condition1="hESC",
                   condition2="iPS")

2.4 isoformSwitch结果探索

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
sar3$isoformSwitchAnalysis %>% head()
#            iso_ref          gene_ref     isoform_id condition_1 condition_2      dIF       pvalue
# 1 isoComp_00000922 geneComp_00000294 TCONS_00000007        hESC         iPS  0.01615 8.645971e-01
# 2 isoComp_00000923 geneComp_00000294 TCONS_00000008        hESC         iPS -0.25885 4.859083e-02
# 3 isoComp_00000924 geneComp_00000294 TCONS_00000009        hESC         iPS  0.24275 7.547031e-05
# 4 isoComp_00000929 geneComp_00000298 TCONS_00000017        hESC         iPS  0.08785 7.690961e-01
# 5 isoComp_00000930 geneComp_00000298 TCONS_00000018        hESC         iPS -0.47220 6.113540e-14
# 6 isoComp_00000931 geneComp_00000298 TCONS_00000019        hESC         iPS  0.23965 2.119346e-03
#           padj     IF1     IF2
# 1 9.665421e-01 0.51665 0.53280
# 2 1.464787e-01 0.48320 0.22435
# 3 7.536969e-04 0.00015 0.24290
# 4 9.355274e-01 0.00000 0.08785
# 5 3.815868e-12 0.47220 0.00000
# 6 1.250306e-02 0.00000 0.23965

2.4.1 Top Switches–gene

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
sar.ge_df=extractTopSwitches( 
  sar3, 
  filterForConsequences = FALSE, 
  n = NA,                
  extractGenes = TRUE,    # when FALSE isoforms are returned
  sortByQvals = TRUE
)
dim(sar.ge_df)
# [1] 68  7

head(sar.ge_df)
#             gene_ref     gene_id gene_name condition_1 condition_2 gene_switch_q_value Rank
# 1  geneComp_00000088        ENO1      ENO1        hESC         iPS        2.429932e-90    1
# 4  geneComp_00000324 XLOC_001217      <NA>        hESC         iPS        1.464063e-24    2
# 5  geneComp_00000171        NBL1      NBL1        hESC         iPS        1.039215e-21    3
# 7  geneComp_00000053       CASP9     CASP9        hESC         iPS        5.232094e-18    4
# 10 geneComp_00000282        UBR4      UBR4        hESC         iPS        1.597415e-17    5
# 14 geneComp_00000072       CROCC     CROCC        hESC         iPS        1.752050e-17    6

2.4.2 Top Switches–isoform

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
sar.iso_df=extractTopSwitches( 
  sar3, 
  filterForConsequences = FALSE, 
  n = NA,                
  extractGenes = FALSE,    # when FALSE isoforms are returned
  sortByQvals = TRUE
)
dim(sar.iso_df)
# [1] 121  12
head(sar.iso_df)
#            iso_ref          gene_ref     isoform_id     gene_id gene_name condition_1 condition_2   IF1
# 1 isoComp_00000297 geneComp_00000088 TCONS_00004087        ENO1      ENO1        hESC         iPS 0.157
# 2 isoComp_00000985 geneComp_00000324 TCONS_00003819 XLOC_001217      <NA>        hESC         iPS 0.084
# 3 isoComp_00000551 geneComp_00000171 TCONS_00000463        NBL1      NBL1        hESC         iPS 0.112
# 4 isoComp_00000549 geneComp_00000171 TCONS_00000460        NBL1      NBL1        hESC         iPS 0.881
# 5 isoComp_00000152 geneComp_00000053 TCONS_00004180       CASP9     CASP9        hESC         iPS 0.474
# 6 isoComp_00000242 geneComp_00000072 TCONS_00000420       CROCC     CROCC        hESC         iPS 0.683
#     IF2    dIF isoform_switch_q_value Rank
# 1 0.000 -0.157           2.429932e-90    1
# 2 0.434  0.350           1.464063e-24    2
# 3 1.000  0.888           1.039215e-21    3
# 4 0.000 -0.881           1.194327e-19    4
# 5 0.000 -0.474           5.232094e-18    5
# 6 0.173 -0.510           1.752050e-17    6

2.4.3 火山图

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
ggplot(data=sar3$isoformFeatures, aes(x=dIF, y=-log10(isoform_switch_q_value))) +
  geom_point(
    aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff
    size=1
  ) +
  geom_hline(yintercept = -log10(0.05), linetype='dashed') + # default cutoff
  geom_vline(xintercept = c(-0.1, 0.1), linetype='dashed') + # default cutoff
  scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) +
  labs(x='dIF', y='-Log10 ( Isoform Switch Q Value )') +
  theme_bw()
LwNDe8.png

3、进阶分析

对于不同组样本的某一基因,存在其中一种isoform相对上调,另一种isoform相对下调的情况。可以对isoform的特点进行分析,从而进一步分析isoformswitch的影响与意义。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
sar3
# This switchAnalyzeRlist list contains:
# 312 isoforms from 68 genes
# 1 comparison from 2 conditions (in total 4 samples)
# 
# Switching features:
# Comparison Isoforms Switches Genes
# 1 hESC vs iPS      121       95    68
# 
# Feature analyzed:
# [1] "Isoform Switch Identification, ORFs, ntSequence"
names(sar3)
# [1] "isoformFeatures"       "exons"                 "conditions"            "designMatrix"         
# [5] "sourceId"              "isoformCountMatrix"    "isoformRepExpression"  "runInfo"              
# [9] "orfAnalysis"           "isoformRepIF"          "ntSequence" 

3.1 可变剪切Alternative Splicing

Six alternative splicing categories
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
sar4 <- analyzeAlternativeSplicing(sar3)
sar4$AlternativeSplicingAnalysis %>% head(2)
#       isoform_id ES ES_genomic_start ES_genomic_end MEE MEE_genomic_start MEE_genomic_end MES
# 1 TCONS_00003880  0             <NA>           <NA>   0              <NA>            <NA>   0
# 2 TCONS_00003881  1          1230098        1230196   0              <NA>            <NA>   0
#   MES_genomic_start MES_genomic_end IR IR_genomic_start IR_genomic_end A5 A5_genomic_start A5_genomic_end
# 1              <NA>            <NA>  0             <NA>           <NA>  0             <NA>           <NA>
# 2              <NA>            <NA>  0             <NA>           <NA>  0             <NA>           <NA>
#   A3 A3_genomic_start A3_genomic_end ATSS
# 1  0             <NA>           <NA>    1
# 2  0             <NA>           <NA>    1
#                                                                        ATSS_genomic_start
# 1 1234725;1235211;1235353;1235538;1235889;1237368;1238302;1238542;1239466;1243149;1244822
# 2                                                                         1243149;1244822
#                                                                          ATSS_genomic_end ATTS
# 1 1234736;1235285;1235448;1235582;1236072;1237426;1238355;1238661;1241309;1243269;1244989    0
# 2                                                                         1243269;1244989    0
#   ATTS_genomic_start ATTS_genomic_end
# 1               <NA>             <NA>
# 2               <NA>             <NA>
1
2
extractSplicingSummary(sar4)
# 一个significant isoform如果发生了A3事件,如果其dIF>0,则iPS相对hESC used more;反之used less
LwNMri.png
1
extractSplicingEnrichment(sar4)
LwSGWe.png
1
extractSplicingGenomeWide(sar4)
LwSlW4.png

3.2 isoform Consequences预测

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
sar5 <- extractSequence(sar4,
                        writeToFile=TRUE,
                        pathToOutput = 'data/output')
list.files("data/output/")
# [1] "isoformSwitchAnalyzeR_isoform_AA.fasta"
# [2] "isoformSwitchAnalyzeR_isoform_nt.fasta"

sar5$ntSequence %>% head(3)
#DNAStringSet object of length 6:
#     width seq                                names               
# [1]  2750 GGGTCTCCCTCTGTTG...CCCACGCGGACAGAG TCONS_00000007
# [2]  4369 TTACTGTTGATTGTGA...AAAATATCGCCCACG TCONS_00000008
# [3]  4272 TTACTGTTGATTGTGA...AAAATATCGCCCACG TCONS_00000009

sar5$aaSequence %>% head(3)
#AAStringSet object of length 3:
#     width seq                                names               
# [1]   389 MLLPPGSLSRPRTFSS...QAQLLPHSGPFRPNS TCONS_00000007
# [2]   389 MLLPPGSLSRPRTFSS...QAQLLPHSGPFRPNS TCONS_00000008
# [3]   389 MLLPPGSLSRPRTFSS...QAQLLPHSGPFRPNS TCONS_00000009
  • CPAT : The Coding-Potential Assessment Tool which is a tool for predicting whether an isoform is coding or not. (_nt.fasta)
  • CPC2 : The Coding Potential Calculator 2 which is a tool for predicting whether an isoform is coding or not. (_nt.fasta)
  • Pfam : Prediction of protein domains (_AA.fasta)
  • SignalP : Prediction of Signal Peptides(_AA.fasta)
  • IUPred2A: Predicts Intrinsically Disordered Regions (IDR) and Intrinsically Disordered Binding Regions (IDBR)(_AA.fasta)
  • NetSurfP-2 : Prediction of Intrinsically Disordered Regions (IDR) (_AA.fasta)

上传核苷酸/氨基酸序列到相应网站,下载预测的结果,再导入到switchAnalyzeRlist对象中

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
sar6 <- analyzeCPAT(
  switchAnalyzeRlist   = sar5,
  pathToCPATresultFile = "data/external/cpat_results.txt",
  codingCutoff         = 0.725,
  removeNoncodinORFs   = TRUE
)
sar6 <- analyzePFAM(
  switchAnalyzeRlist   = sar6,
  pathToPFAMresultFile = "data/external/pfam_results.txt"
)
sar6 <- analyzeSignalP(
  switchAnalyzeRlist       = sar6,
  pathToSignalPresultFile  = "data/external/signalP_results.txt"
)

sar6 <- analyzeIUPred2A(
  switchAnalyzeRlist        = sar6,
  pathToIUPred2AresultFile = "data/external/iupred2a_result.txt.gz"
)


sar7 <- analyzeSwitchConsequences(sar6)
length(sar7)
# [1] 18
names(sar7)
# [1] "isoformFeatures"             "exons"                      
# [3] "conditions"                  "designMatrix"               
# [5] "sourceId"                    "isoformCountMatrix"         
# [7] "isoformRepExpression"        "runInfo"                    
# [9] "orfAnalysis"                 "isoformRepIF"               
# [11] "ntSequence"                  "isoformSwitchAnalysis"      
# [13] "AlternativeSplicingAnalysis" "aaSequence"                 
# [15] "domainAnalysis"              "signalPeptideAnalysis"      
# [17] "idrAnalysis"        "switchConsequence"

sar7$switchConsequence %>% 
  dplyr::filter(isoformsDifferent=="TRUE") %>% head()
#            gene_ref gene_id gene_name condition_1 condition_2 isoformUpregulated
# 1 geneComp_00000003   ACAP3     ACAP3        hESC         iPS     TCONS_00003882
# 2 geneComp_00000004   ACOT7     ACOT7        hESC         iPS     TCONS_00004035
# 3 geneComp_00000004   ACOT7     ACOT7        hESC         iPS     TCONS_00004035
# 4 geneComp_00000004   ACOT7     ACOT7        hESC         iPS     TCONS_00004035
# 5 geneComp_00000004   ACOT7     ACOT7        hESC         iPS     TCONS_00004035
# 6 geneComp_00000004   ACOT7     ACOT7        hESC         iPS     TCONS_00004036
#   isoformDownregulated       iso_ref_up     iso_ref_down    featureCompared
# 1       TCONS_00003880 isoComp_00000006 isoComp_00000004 ORF_seq_similarity
# 2       TCONS_00004039 isoComp_00000010 isoComp_00000014 ORF_seq_similarity
# 3       TCONS_00004039 isoComp_00000010 isoComp_00000014         NMD_status
# 4       TCONS_00004039 isoComp_00000010 isoComp_00000014 domains_identified
# 5       TCONS_00004039 isoComp_00000010 isoComp_00000014     IDR_identified
# 6       TCONS_00004039 isoComp_00000011 isoComp_00000014         NMD_status
#   isoformsDifferent switchConsequence
# 1              TRUE     ORF is longer
# 2              TRUE     ORF is longer
# 3              TRUE   NMD insensitive
# 4              TRUE       Domain gain
# 5              TRUE          IDR gain
# 6              TRUE   NMD insensitive

extractConsequenceSummary(sar7)

LwS1sh.png

1
extractConsequenceEnrichment(sar7)
LwSHhS.png
1
2
3
switchPlot(sar7, gene = 'ACAP3',
           condition1="hESC",
           condition2="iPS")
LwStnW.png