#!/usr/bin/perl -w # This script reads sequence files with filename # extensions matching the one provided # with the command line option -ext # For each file, the script calls the EMBOSS # newcpgreport program on both the sequence # and a mutated version and reports the # CpG Islands found # Author: MyName, 16 Aug 2003 # Version 1.01 use strict; use Getopt::Long; use Bio::Factory::EMBOSS; my ($file_ext, $f, $mut, @files); our $emboss_fac; # Define the check_cpg subroutine # It calls newcpgreport, processes the output # and prints lines containing 'island' sub check_cpg($) { my ($file, $cpg, @result, $lin); $file = $_[0]; $cpg = $emboss_fac->program('newcpgreport'); $cpg->run({'-sequence' => "$file", '-window' => '100', '-shift' => '1', '-minlen' => '200', '-minoe' => '0.6', '-minpc' => '50.0', '-outfile' => "$file.cpg"}); # Read the newcpgreport output open(CPGFIL, "$file.cpg") or die "Could not open file $file.cpg \n"; @result = ; close (CPGFIL); foreach $lin (@result) { if ($lin =~ m/island/) { print "$_[0] : $lin"; } } # End of foreach result return; } # End of subroutine check_cpg # Main script code below: # Initialize the EMBOSS factory $emboss_fac = Bio::Factory::EMBOSS->new( '-verbose' => '1'); # Get the command line option GetOptions("ext=s" => \$file_ext); # Check to see if -ext was specified if (!defined $file_ext ) { die "Usage $0 -ext file_extension\n"; } # Get the matching filenames @files = glob("*.$file_ext"); # If no matching files, print message and exit if (@files < 1) { die "No files with extension $file_ext\n"; } foreach $f (@files) { check_cpg("$f"); # mutate the sequence file $mut = $emboss_fac->program('msbar'); $mut->run({'-sequence' => "$f", '-count' => '100', '-point' => '1', '-block' => '1', '-codon' => '1', '-outseq' => "$f.mut"}); # Run a CpG report on the mutated sequence check_cpg("$f.mut"); } # End of foreach @files