-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSConstruct
executable file
·783 lines (669 loc) · 24.8 KB
/
SConstruct
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import os.path
import subprocess
env = Environment()
env.Append(PERL5LIB=[env.Dir('lib-perl5')])
env.Tool('perl5')
## Add a NoShellCommand builder to be used like Command()
##
## This has the advantage that there's no shell involved, saving us
## from having to escape quotes, spaces, wildcards, and whatsnot.
##
## See the whole mailing list thread at
## https://pairlist4.pair.net/pipermail/scons-users/2015-October/004150.html
## with the solution found much later with
## https://pairlist4.pair.net/pipermail/scons-users/2015-November/004193.html
def no_shell_command(target, source, env):
## Some args may be File or Dir which need to be converted to str.
args = [str(x) for x in env['action']]
return subprocess.call(args)
def no_shell_command_strfunc(target, source, env):
args = env['action']
return "$ %s " % (args[0]) + " ".join(["'%s'" % (arg) for arg in args[1:]])
no_shell_command_action = Action(no_shell_command, strfunction=no_shell_command_strfunc)
env.Append(BUILDERS={'NoShellCommand' : Builder(action=no_shell_command_action)})
## FIXME very temporary while we move all of MyLib to our modules
env.Append(PERL5LIB=[env.Dir('scripts')])
env.Help("""
TARGETS
catalogue (default)
Build pdf for catalogue --- the pdf with tables of all genes
and their products, counts of genes per cluster, sequence
alignment, and list of anomalies.
data
Get raw data required for the analysis from the Entrez system
This will not download new data if there is only data already
downloaded. See target 'update' for that.
update
Remove previously downloaded raw data and download fresh one.
It will not rebuild a catalogue or manuscript unless those targets
are also specified.
csa_data
Prepare a csv file with the histone gene information downloaded
from the Entrez gene database.
analysis
Run all the scripts to analyse the data such as: sequence alignments,
search for anomalies on the sequence annotations, LaTeX tables listing
all genes and proteins, sequence differences between isoforms. The
analysis results are required for the publication.
manuscript
Build manuscript.pdf.
OPTIONS
--api-key=API-KEY
Set NCBI E-utilities API key for requests. Without an API key,
NCBI will limit the number of requests and the getting data step
may fail.
scons --api-key="36-characters-hexadecimal-string"
--email=ADDRESS
Set email to be used when connecting to the NCBI servers.:
scons --email="Your Name <your.name@domain.here>"
scons --email="<your.name@domain.here>"
--organism=NAME
Organism species name to use when searching RefSeq for histone
sequences. Defaults to Homo Sapiens.
--verbose
LaTeX and BibTeX compilers are silenced by default using the
batchmode and terse options. Set this option to revert it.
""")
AddOption(
"--email",
dest = "email",
action = "store",
type = "string",
default = "",
help = "E-mail provided to NCBI when connecting to Entrez."
)
AddOption(
"--api-key",
dest = "api-key",
action = "store",
type = "string",
default = "",
help = "NCBI E-utilities API key."
)
AddOption(
"--verbose",
dest = "verbose",
action = "store_true",
default = False,
help = "Print LaTeX and BibTeX output."
)
AddOption(
"--organism",
dest = "organism",
action = "store",
type = "string",
default = "Homo sapiens",
help = "Organism to search for histones."
)
if not env.GetOption("verbose"):
env.AppendUnique(PDFLATEXFLAGS = "-interaction=batchmode")
env.AppendUnique(PDFTEXFLAGS = "-interaction=batchmode")
env.AppendUnique(TEXFLAGS = "-interaction=batchmode")
env.AppendUnique(LATEXFLAGS = "-interaction=batchmode")
env.AppendUnique(BIBTEXFLAGS = "--terse") # some ports of BibTeX may use --quiet instead
## Build configuration (check if dependencies are all installed)
##
## The really really really right way to do the checks would be to set up a
## scanner that finds the required LaTeX packages and perl modules. But that's
## something that should be done upstream in SCons (the scan for LaTeX source
## is already being worked on so this may not be necessary in the future)
def CheckLaTeXPackage(context, package):
context.Message("Checking for LaTeX package %s..." % package)
is_ok = 0 == subprocess.call(["kpsewhich", package + ".sty"],
stdout = open(os.devnull, "wb"))
context.Result(is_ok)
return is_ok
def CheckLaTeXClass(context, doc_class):
context.Message("Checking for LaTeX document class %s..." % doc_class)
is_ok = 0 == subprocess.call(["kpsewhich", doc_class + ".cls"],
stdout = open(os.devnull, "wb"))
context.Result(is_ok)
return is_ok
def CheckTeXDef(context, tex_def):
context.Message("Checking for TeX definition file %s..." % tex_def)
is_ok = 0 == subprocess.call(["kpsewhich", tex_def + ".def"],
stdout = open(os.devnull, "wb"))
context.Result(is_ok)
return is_ok
def CheckBibTeXStyle(context, style):
context.Message("Checking for BibTeX style %s..." % style)
is_ok = 0 == subprocess.call(["kpsewhich", style + ".bst"],
stdout = open(os.devnull, "wb"))
context.Result(is_ok)
return is_ok
def CheckEmail(context, email):
context.Message("Checking e-mail address...")
## Don't check email validity, the program that uses it will do it
is_ok = email
context.Result(is_ok)
return is_ok
def CheckAPIKey(context, api_key):
context.Message("Checking api-key...")
is_ok = api_key
context.Result(api_key)
return is_ok
def CheckProg(context, app_name):
context.Message("Checking for %s..." % app_name)
is_ok = context.env.WhereIs(app_name)
context.Result(is_ok)
return is_ok
def CheckCommand(context, command, message):
context.Message("Checking %s..." % message)
is_ok = context.TryAction(command)[0]
context.Result(is_ok)
return is_ok
def CheckPerlModule(context, module_name):
context.Message("Checking for perl module %s..." % module_name)
is_ok = 0 == subprocess.call(["perl", "-M" + module_name, "-e 1"],
stdout = open(os.devnull, "wb"))
context.Result(is_ok)
return is_ok
def CheckVariable(context, variable_name):
context.Message("Checking for variable %s..." % variable_name)
## Because env is not really a dict, we can't use 'is in env'
## and need to resort to a try catch.
try:
context.env[variable_name]
is_ok = True
except KeyError:
is_ok = False
context.Result(is_ok)
return is_ok
## Many modules bioperl-run are mainly a wrapper to executable. Checking
## for the presence of the module is not enough, we need to check if they
## are working (well, we just check if bioperl finds the programs).
def CheckBioperlRunExecutable(context, module):
context.Message("Checking for working %s..." % module)
command = ("perl -M%s -e '%s->new()->executable()'" % (module, module))
is_ok = context.TryAction(command)[0]
context.Result(is_ok)
return is_ok
conf = Configure(
env,
custom_tests = {
"CheckLaTeXClass" : CheckLaTeXClass,
"CheckLaTeXPackage" : CheckLaTeXPackage,
"CheckTeXDef" : CheckTeXDef,
"CheckBibTeXStyle" : CheckBibTeXStyle,
"CheckPerlModule" : CheckPerlModule,
"CheckEmail" : CheckEmail,
"CheckAPIKey" : CheckAPIKey,
"CheckProg" : CheckProg,
"CheckCommand" : CheckCommand,
"CheckVariable" : CheckVariable,
"CheckBioperlRunExecutable" : CheckBioperlRunExecutable,
}
)
## this is needed by the scons perl tool
perl_dependencies = [
"Module::ScanDeps",
]
## grep -rh '^use ' lib-perl5/ scripts/| sort | uniq
## and then remove the core modules and pragmas
perl_analysis_dependencies = [
"Bio::AlignIO",
"Bio::Align::Utilities",
"Bio::CodonUsage::Table",
"Bio::DB::EUtilities",
"Bio::LocatableSeq",
"Bio::Root::Version",
"Bio::Seq",
"Bio::SeqIO",
"Bio::SeqUtils",
"Bio::SimpleAlign",
"Bio::Tools::CodonTable",
"Bio::Tools::EUtilities",
"Bio::Tools::SeqStats",
"File::Which",
"Moose",
"Moose::Util::TypeConstraints",
"MooseX::StrictConstructor",
"namespace::autoclean",
"Statistics::Basic",
"Text::CSV",
]
## grep -rh '^use ' t/ | sort | uniq
## and then remove the core modules and pragmas
perl_test_dependencies = [
"Bio::LocatableSeq",
"Bio::Seq",
"Bio::SimpleAlign",
"Test::Exception",
"Test::Output",
]
## This is a dict where the key is perl module and value the likely program
## (or suite of programs) that is likely to be missing to make it work.
bioperl_run_dependencies = {
"Bio::Tools::Run::Alignment::Clustalw" : "clustalw",
"Bio::Tools::Run::Alignment::TCoffee" : "t-coffee",
"Bio::Tools::Run::Phylo::PAML::Codeml" : "PAML",
}
latex_package_dependencies = [
"fontenc",
"inputenc",
"graphicx",
"url",
"todonotes",
"natbib",
"color",
"kpfonts",
"seqsplit",
"eqparbox",
"capt-of",
"hyperref",
"fp",
"afterpage",
"isodate",
"etoolbox",
"stringstrings",
"intcalc",
"siunitx",
"textgreek",
"xtab",
]
env.Help("""
DEPENDENCIES
Programs:
* bp_genbank_ref_extractor - Distributed with the perl module
Bio-EUtilities version 1.74 or later.
* weblogo - Available at http://weblogo.threeplusone.com/
Perl modules:
""")
for module in (perl_dependencies + perl_analysis_dependencies
+ list(bioperl_run_dependencies.keys())):
env.Help(" * %s\n" % module)
env.Help("""
Perl modules for test suite:
""")
for module in perl_test_dependencies:
env.Help(" * %s\n" % module)
env.Help("""
LaTeX document class
* memoir
LaTeX packages
""")
for package in latex_package_dependencies:
env.Help(" * %s\n" % package)
env.Help("""
BibTeX style
* agu
""")
## Seriously, this should be the default. Otherwise, users won't even
## get to see the help text unless they pass the configure tests.
## And Configure(..., clean=False,help=False) does not really work,
## it just makes all configure tests fail.
if not (env.GetOption('help') or env.GetOption('clean')):
for prog in ["bp_genbank_ref_extractor", "weblogo"]:
if not conf.CheckProg(prog):
print ("Unable to find `%s' installed" % prog)
Exit(1)
## We need this option in weblogo to remove the numbering from the X axis.
## See issue #33. This option was added to weblogo version 3.5.0.
if not conf.CheckCommand("printf '>1\\nAC\\n>2\\nAC\\n'"
+ " | weblogo --number-interval 50"
+ " > %s" % os.devnull,
"if weblogo supports --number-interval"):
print ("weblogo has no --number-interval option (added in weblogo 3.5.0)")
Exit(1)
for module in set (perl_dependencies + perl_analysis_dependencies
+ list(bioperl_run_dependencies.keys())):
if not conf.CheckPerlModule(module):
print ("Unable to find perl module %s." % module)
Exit(1)
for module, program in bioperl_run_dependencies.items():
if not conf.CheckBioperlRunExecutable(module):
print ("bioperl's %s is not working (did you install %s?)" % (module, program))
Exit(1)
if "check" in COMMAND_LINE_TARGETS:
for module in set (perl_test_dependencies):
if not conf.CheckPerlModule(module):
print ("Unable to find perl module %s." % module)
Exit(1)
if not conf.CheckVariable('EPSTOPDF'):
print ("SCons EPSTOPDF not configured. Do you have epstopdf installed (part of texlive)")
Exit(1)
## We can get SCons in a state where EPSTOPDF is defined but the
## program where it points to does not really exist. See issue #48
if not conf.CheckProg(env['EPSTOPDF']):
print ("Unable to find `epstopdf' (part of texlive) installed")
Exit(1)
## We need this so we can then use CheckLatex* or the user gets a
## pretty cryptic error message.
if not conf.CheckProg("kpsewhich"):
print ("Unable to find `kpsewhich' (part of texlive) installed")
Exit(1)
for package in latex_package_dependencies:
if not conf.CheckLaTeXPackage(package):
print ("Unable to find required LaTeX package %s." % package)
Exit(1)
if not conf.CheckLaTeXClass("memoir"):
print ("Unable to find the LaTeX document class memoir.")
Exit(1)
if not conf.CheckBibTeXStyle("agu"):
print ("Unable to find the BibTeX style agu.")
Exit(1)
## We shouldn't need this. If textgreek is properly installed, this
## is not a problem but that doesn't happen in Debian Jessie and
## many Ubuntu versions. And instead of checking for lgrenc we
## should be testing for a working textgreek package but I don't
## want to setup such configure check with SCons.
if not conf.CheckTeXDef("lgrenc"):
print ("Didn't found lgrenc.def so textgreek is broken.")
Exit(1)
## If the users does not want to set an email, let him. We warn
## here and Bio-EUtilities warns again, but don't force it.
if not conf.CheckEmail(env.GetOption("email")):
print ("WARNING: Per NCBI policy, an email is required when using EUtilities\n"
" to retrieve data from the Entrez system. Consider using\n"
" '--email' and see README for details why.")
## If the users does not want to set an email, let him. We warn
## here and Bio-EUtilities warns again, but don't force it.
if not conf.CheckAPIKey(env.GetOption("api-key")):
print ("WARNING: a NCBI E-utilities API Key is required to make the number\n"
" of queries that retrieving the data for this project.\n"
" Considering a NCBI account, then create an API key, and\n"
" pass it via the '--api-key' option.")
env = conf.Finish()
##
## Actual TARGETS from this point on
##
scripts_dir = env.Dir ("scripts")
results_dir = env.Dir ("results")
figures_dir = env.Dir ("figs")
seq_dir = env.Dir (os.path.join (str (results_dir), "sequences"))
reference_dirname = "reference-" + env.GetOption("organism").replace(" ", "-").lower()
reference_dir = env.Dir (os.path.join ("data", reference_dirname))
def path4lib(name):
return os.path.join("lib-perl5", name)
def path4script (name):
return os.path.join (str (scripts_dir), name)
def path4result (name):
return os.path.join (str (results_dir), name)
def path4figure (name):
return os.path.join (str (figures_dir), name)
def path4seq (name):
return os.path.join (str (seq_dir), name)
## TARGET data
##
## SCons does not like it when the target is a directory and will always
## consider it up to date, even if the source changes. Because of this,
## we set the data.csv and data.asn1 files as target
def create_extract_sequences_args():
## Gene names to use on the entrez query. Note that:
## "Right side truncation with wild card does work for gene symbol"
## ~ NCBI helpdesk via email (September 2011)
gene_names = [
"H1*[gene name]",
"H2A*[gene name]",
"H2B*[gene name]",
"H3*[gene name]",
"H4*[gene name]",
"HIST1*[gene name]",
"HIST2*[gene name]",
"HIST3*[gene name]",
"HIST4*[gene name]",
"CENPA[gene name]"
]
entrez_query = '"%s"[organism] AND (%s)' % (env.GetOption("organism"),
" OR ".join (gene_names))
bp_genbank_ref_extractor_call = [
"bp_genbank_ref_extractor",
"--assembly", "Reference GRC",
"--genes", "uid",
"--pseudo",
"--non-coding",
"--upstream", "500",
"--downstream", "500",
"--transcripts", "accession",
"--proteins", "accession",
"--limit", "300",
"--format", "genbank",
"--save", seq_dir,
"--save-data", "csv",
]
if env.GetOption("email"):
bp_genbank_ref_extractor_call.extend(["--email", env.GetOption("email")])
if env.GetOption("api-key"):
bp_genbank_ref_extractor_call.extend(["--api-key", env.GetOption("api-key")])
bp_genbank_ref_extractor_call.append(entrez_query)
return bp_genbank_ref_extractor_call
## Ideally we would set target to the sequences directory and it would be
## automatically removed when it gets rebuild. However, a Dir as targets
## means it's always out of date so it would force a rebuild every time.
## That is why we set the csv and log files as targets.
raw_data = env.NoShellCommand(
source = None,
target = [path4seq("data.csv"), path4seq("extractor.log")],
action = create_extract_sequences_args())
## AddPreAction() is required so that the directory is removed when rebuilding.
## Clean() is required so that it's removed when calling "scons -c".
env.AddPreAction(raw_data, Delete (seq_dir))
env.Clean(raw_data, seq_dir)
db_store = File (path4result("histones_db.store"))
data_store = env.PerlSub(
target = db_store,
source = path4lib("HistoneSequencesDB.pm"),
action = ('HistoneSequencesDB->new("%s")->write_db("%s")'
%(seq_dir.path, db_store.path))
)
env.Depends(data_store, raw_data)
## old Storable files which we are replacing by HistoneSequencesDB
seq_store = env.PerlScript(
target = [path4seq("canonical.store"), path4seq("variant.store"),
path4seq("h1.store")],
source = path4script("extract_sequences.pl"),
action = [seq_dir]
)
env.Alias("data", [raw_data, data_store, seq_store])
## TARGET csv_data
##
## This is completely useless and is not required by any other target.
## It is even dangerous because csv is a really poor format for genes.
## We only have this because Andrew wants it for his other projects.
## See https://github.com/af-lab/histone-catalogue/issues/3
def path4csv (name=""):
return os.path.join(str (results_dir), "csv", name)
csv_data = env.PerlScript(
target = [path4csv ("canonical_core_histones.csv"),
path4csv ("variant_core_histones.csv"),
path4csv ("linker_histones.csv")],
source = path4script ("create_histone_csv.pl"),
action = [db_store, Dir (path4csv()).path]
)
env.Depends(csv_data, [data_store])
env.Alias("csv", csv_data)
## TARGET update
##
## Remove the previously downloaded data forcing a rebuild.
if "update" in COMMAND_LINE_TARGETS:
env.AlwaysBuild(raw_data)
env.Alias("update", raw_data)
## TARGET analysis
##
## For analysis, each script is its own target. We then set an alias that
## groups all of them. Each of these scripts generate a large number of
## files, the targets, we need to make lists of them all
perl_db_var = "HistoneSequencesDB::read_db('%s')" % db_store.path
clust_targets = list ()
refer_targets = list ()
utr_targets = list ()
refer_targets += [path4result ("table-reference_comparison.tex")]
utr_targets += [
path4result ("variables-utr.tex"),
path4result ("aligned_stem_loops.fasta"),
path4figure ("seqlogo_stem_loops.eps"),
path4result ("aligned_HDEs.fasta"),
path4figure ("seqlogo_HDEs.eps"),
]
analysis = [
env.PerlOutput(
target = path4result("table-histone_catalogue.tex"),
source = path4lib("HistoneCatalogue.pm"),
M = ["HistoneCatalogue", "HistoneSequencesDB"],
eval = ("HistoneCatalogue::say_histone_catalogue(%s->canonical_core)"
% perl_db_var),
),
env.PerlOutput(
target = path4result("table-variant_catalogue.tex"),
source = path4lib("HistoneCatalogue.pm"),
M = ["HistoneCatalogue", "HistoneSequencesDB"],
eval = ("HistoneCatalogue::say_histone_catalogue(%s->variants_core)"
% perl_db_var),
),
env.PerlOutput(
target = path4result("variables-histone_counts.tex"),
source = path4lib("HistoneCatalogue.pm"),
M = ["HistoneCatalogue", "HistoneSequencesDB"],
eval = ("HistoneCatalogue::say_histone_counts(%s)" % perl_db_var),
),
env.PerlOutput(
target = path4result("variables-cluster_stats.tex"),
source = path4script("cluster_stats.pl"),
args = [db_store],
),
env.PerlOutput(
target = path4result("variables-protein_stats.tex"),
source = path4script("protein_stats.pl"),
args = [db_store],
),
env.PerlOutput(
target = path4result("histone_insanities.tex"),
source = path4script("histone_sanity_checks.pl"),
args = [db_store],
),
env.PerlScript(
target = utr_targets,
source = path4script ("utr_analysis.pl"),
action = ["--sequences", seq_dir, "--figures", figures_dir,
"--results", results_dir],
),
env.PerlOutput(
target = path4result("variables-configuration.tex"),
source = path4lib("HistoneCatalogue.pm"),
M = ["HistoneCatalogue"],
eval = ("HistoneCatalogue::write_config_variables('%s')"
% File (path4seq("extractor.log")).path)
),
env.PerlOutput(
target = path4result("table-codon_usage.tex"),
source = path4script("codon_usage.pl"),
args = [db_store],
),
]
if os.path.isdir(str (reference_dir)):
analysis.append (env.PerlScript(
target = refer_targets,
source = path4script ("reference_comparison.pl"),
action = ["--sequences", seq_dir, "--results", results_dir,
"--reference", reference_dir],
))
else:
## This is only needed for manuscript.pdf anyway. If we ever get to
## support a manuscript for multiple organisms, it may be possible
## that they will have no reference, so we will have to figure out a
## way to handle this better then.
print ("WARNING: no reference data found for %s.\n"
" Skipping comparison against reference."
% env.GetOption("organism"))
## The whole mess of alignment targets and their dependencies:
##
## protein aligns ----> table describing isoforms
## | |--> protein sequence logo
## | |--> protein align stats -- >alignment percentage identity
## \_/
## transcript aligns ----> cds sequence logo
## |--> transcript align stats --> dn/ds
cds_aligns = []
protein_aligns = []
for histone in ["H2A", "H2B", "H3", "H4"]:
protein_align_f = File (path4result("aligned_%s_proteins.fasta" % histone))
protein_align = env.PerlScript(
target = protein_align_f,
source = path4script("align_proteins.pl"),
action = [db_store, histone, protein_align_f],
)
env.Depends(protein_align, [data_store])
protein_aligns += protein_align
cds_align_f = File (path4result("aligned_%s_cds.fasta" % histone))
cds_align = env.PerlScript(
target = cds_align_f,
source = path4script("align_transcripts.pl"),
action = [db_store] + protein_align + [cds_align_f],
)
env.Depends(cds_align, [data_store] + protein_align)
cds_aligns += cds_align
isoforms_desc = env.PerlOutput(
target = path4result("table-%s-proteins-align.tex" % histone),
source = path4script("describe_isoforms.pl"),
args = [db_store] + protein_align,
)
env.Depends(isoforms_desc, protein_align)
analysis += [isoforms_desc]
protein_logo_f = File (path4figure("seqlogo_%s_proteins.eps" % histone))
cds_logo_f = File (path4figure("seqlogo_%s_cds.eps" % histone))
for aln, logo_f in zip ([protein_align, cds_align], [protein_logo_f, cds_logo_f]):
logo = env.PerlScript(
target = logo_f,
source = path4script("mk_histone_seqlogo.pl"),
action = aln + [logo_f],
)
Depends(logo, [aln])
analysis += [logo]
cds_align_stats = env.PerlOutput(
target = path4result("variables-align_transcripts_stats.tex"),
source = path4script("align_transcripts_stats.pl"),
args = cds_aligns,
)
Depends(cds_align_stats, cds_aligns)
analysis += [cds_align_stats]
protein_align_stats = env.PerlOutput(
target = path4result("variables-align_proteins_stats.tex"),
source = path4script("align_proteins_stats.pl"),
args = protein_aligns,
)
Depends(protein_align_stats, protein_aligns)
analysis += [protein_align_stats]
env.Alias ("analysis", analysis)
env.Depends (
analysis,
[data_store, seq_store, File (path4script ("MyLib.pm"))]
)
## Our figures, converted to pdf as required for pdflatex
figures = env.PDF(source = Glob(os.path.join(str(figures_dir), "*.eps")))
## TARGET catalogue
##
## A simpler LaTeX document with the most important tables and figures.
## This works for all organisms, since the actual manuscript is only
## available for the select organisms that we bothered to write.
catalogue = env.PDF(
target = "catalogue.pdf",
source = "catalogue.tex"
)
env.Alias("catalogue", catalogue)
Depends(catalogue, [figures, analysis])
env.Default(catalogue)
## TARGET manuscript
manuscript = env.PDF (
target = "manuscript.pdf",
source = "manuscript.tex"
)
env.Alias ("manuscript", manuscript)
Depends (manuscript, [figures, analysis])
## Because the manuscript is not built by default, then it's also not
## removed by default by doing `scons -c`. So we add it to the default
## targets when cleaning
## See http://dcreager.net/2010/01/08/default-scons-clean-targets/
if env.GetOption("clean"):
env.Default(manuscript)
## TARGET check
##
## Only runs if specified from command line.
if "check" in COMMAND_LINE_TARGETS:
test_suite = []
for test_file in env.Glob("t/*.t"):
unit = env.PerlScript(source=test_file, target=None, action=[])
test_suite.append(unit)
check = Alias ("check", [test_suite])