[go: nahoru, domu]

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ReadsHtsgetData source, refactor HtsgetReader #6662

Draft
wants to merge 26 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
a476ce7
Implement ReadsHtsgetData source, refactor HtsgetReader
andersleung Jun 16, 2020
ff50eec
Update HtsgetReader command line tests
andersleung Jun 16, 2020
aa4d48a
Commit missing files
andersleung Jun 16, 2020
ba2dd80
Add javadoc, further refactoring to allow easier testing, lazily stre…
andersleung Jun 17, 2020
0cb9530
Use MergingSamRecordIterator internally to ensure proper ordering of …
andersleung Jun 24, 2020
dce337f
Close out all iterators, request headers asynchronously, minor refact…
andersleung Jun 30, 2020
edda05c
Perform map insertion outside of future to avoid concurrent modification
andersleung Jun 30, 2020
3415528
Address PR comments
andersleung Jul 6, 2020
74f7d70
Fix test
andersleung Jul 6, 2020
890da67
WIP Start adding ReadsHtsgetDataSource tests
andersleung Jul 10, 2020
c0cc6ee
Merge branch 'master' into readsHtsgetDataSource
andersleung Jul 10, 2020
8afa04b
WIP fix broken tests
andersleung Jul 13, 2020
946bb4b
WIP Add tests for filtering duplicates, try 127.0.0.1 instead of loca…
andersleung Jul 13, 2020
100b0a4
WIP Try spawning sibling docker container for refserver
andersleung Jul 13, 2020
c6a36cf
WIP try 0.0.0.0:3000 instead of 127.0.0.1
andersleung Jul 13, 2020
1022cbe
WIP Specify 127.0.0.1 in port mapping when running docker
andersleung Jul 13, 2020
062d9ca
WIP configure refserver to listen on 0.0.0.0
andersleung Jul 13, 2020
f166712
WIP configure all IP addresses to 0.0.0.0:3000
andersleung Jul 14, 2020
37eb57d
WIP run test container with net=host
andersleung Jul 14, 2020
1e77370
Add comment to .travis.yml
andersleung Jul 14, 2020
f71ea84
Add end to end tests in PrintReads using htsget source
andersleung Jul 24, 2020
7967843
Merge branch 'master' into readsHtsgetDataSource
andersleung Jul 24, 2020
c437c29
WIP use htsjdk HtsgetBAMFileReader, refactor ReadsPathDataSource to u…
andersleung Aug 18, 2020
27e7dc7
Merge branch 'master' into readsHtsgetDataSource
andersleung Aug 18, 2020
a32a9e3
Add readme to htsgetScripts, move htsget_config.json
andersleung Aug 19, 2020
ea540d1
Remove ReadsHtsgetDataSource and GATK versions of htsget classes
andersleung Aug 19, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
WIP use htsjdk HtsgetBAMFileReader, refactor ReadsPathDataSource to u…
…se GATKPath
  • Loading branch information
andersleung committed Aug 18, 2020
commit c437c2975eedd53acbb629acb9b15323efe0cb8f
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ before_install:
# http://docs.travis-ci.com/user/database-setup/#MySQL
- sudo /etc/init.d/mysql stop
- sudo /etc/init.d/postgresql stop
- sudo bash scripts/htsgetScripts/launchDocker.sh
- sudo bash scripts/htsgetScripts/launch-htsget-reference-server.sh

install:
- if [[ $TRAVIS_SECURE_ENV_VARS == false && $TEST_TYPE == cloud ]]; then
Expand Down
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ repositories {
mavenLocal()
}

final htsjdkVersion = System.getProperty('htsjdk.version','2.23.0')
final htsjdkVersion = System.getProperty('htsjdk.version','2.23.0-7-ge803eea-SNAPSHOT')
final picardVersion = System.getProperty('picard.version','2.22.8')
final barclayVersion = System.getProperty('barclay.version','3.0.0')
final sparkVersion = System.getProperty('spark.version', '2.4.5')
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# this script starts a docker container running a GA4GH reference htsget server for use in htsget tests
WORKING_DIR=/home/travis/build/broadinstitute

docker pull ga4gh/htsget-ref:1.1.0
Expand All @@ -7,4 +8,4 @@ docker container run -d --name htsget-server -p 3000:3000 --env HTSGET_PORT=3000
./htsref -config /data/htsget_config.json
docker container ls -a

curl http://localhost:3000
curl http://localhost:3000
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import java.io.Serializable;
import java.nio.file.Path;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -66,6 +67,12 @@ public List<Path> getReadIndexPaths() {
return readIndices.stream().map(GATKPath::toPath).collect(Collectors.toList());
}

public List<GATKPath> getReadIndexPathSpecifiers() {
return readIndices == null || readIndices.isEmpty()
? null
: Collections.unmodifiableList(readIndices);
}

/**
* Get the read validation stringency specified at the command line, or the default value if none was specified
* at the command line.
Expand Down
13 changes: 9 additions & 4 deletions src/main/java/org/broadinstitute/hellbender/engine/GATKPath.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.io.*;
import java.nio.file.FileSystemNotFoundException;
import java.nio.file.FileSystems;
import java.nio.file.Path;
Expand Down Expand Up @@ -39,6 +36,14 @@ public GATKPath(final String uriString) {
super(uriString);
}

public GATKPath(final File file) {
super(file.toString());
}

public GATKPath(final Path path) {
super(path.toString());
}

@Override
public Path toPath() {
// special case GCS, in case the filesystem provider wasn't installed properly but is available.
Expand Down
19 changes: 5 additions & 14 deletions src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -456,20 +456,11 @@ else if (hasCramInput()) {
factory = factory.enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES);
}

final Map<String, List<GATKPath>> pathsByScheme = readArguments.getReadPathSpecifiers().stream()
.collect(Collectors.groupingBy(path -> path.getURI().getScheme()));

final List<GATKPath> htsgetPaths = pathsByScheme.get(GATKPath.HTSGET_SCHEME);
if (htsgetPaths != null) {
if (htsgetPaths.size() != readArguments.getReadPathSpecifiers().size()) {
throw new UserException.UnimplementedFeature("A combination of htsget sources and other sources is currently not supported.");
}
reads = new ReadsHtsgetDataSource(pathsByScheme.get(GATKPath.HTSGET_SCHEME), factory);
} else {
reads = new ReadsPathDataSource(readArguments.getReadPaths(), readArguments.getReadIndexPaths(), factory, cloudPrefetchBuffer,
(cloudIndexPrefetchBuffer < 0 ? cloudPrefetchBuffer : cloudIndexPrefetchBuffer));
}

reads = new ReadsPathDataSource(
readArguments.getReadPathSpecifiers(), readArguments.getReadIndexPathSpecifiers(),
factory,
cloudPrefetchBuffer, (cloudIndexPrefetchBuffer < 0 ? cloudPrefetchBuffer : cloudIndexPrefetchBuffer)
);
}
else {
reads = null;
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public SAMFileHeader getHeader(final GATKPath filePathSpecifier, final GATKPath
.validationStringency(validationStringency)
.referenceSequence(cramReferencePathSpec == null ? null : referencePathSpecifier.toPath());
try (final ReadsDataSource readsDataSource =
new ReadsPathDataSource(Collections.singletonList(filePathSpecifier.toPath()), factory)) {
new ReadsPathDataSource(Collections.singletonList(filePathSpecifier), factory)) {
return readsDataSource.getHeader();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public Object[][] testCorrectRegionsHaveCorrectReadsAndSizeData() {
*/
@Test(dataProvider = "testCorrectRegionsHaveCorrectReadsAndSizeData")
public void testRegionsHaveCorrectReadsAndSize( final String reads, final String reference, final List<SimpleInterval> shardIntervals, final int minRegionSize, final int maxRegionSize, final int assemblyRegionPadding ) throws IOException {
try (final ReadsDataSource readsSource = new ReadsPathDataSource(IOUtils.getPath(reads));
try (final ReadsDataSource readsSource = new ReadsPathDataSource(new GATKPath(reads));
final ReferenceDataSource refSource = ReferenceDataSource.of(IOUtils.getPath(reference));
final ReferenceSequenceFile referenceReader = new CachingIndexedFastaSequenceFile(IOUtils.getPath(b37_reference_20_21));
) {
Expand Down Expand Up @@ -104,7 +104,7 @@ public void testRegionsHaveCorrectReadsAndSize( final String reads, final String
previousRead = currentRead;
}

try ( final ReadsDataSource innerReadsSource = new ReadsPathDataSource(IOUtils.getPath(reads)) ) {
try ( final ReadsDataSource innerReadsSource = new ReadsPathDataSource(new GATKPath(reads)) ) {
final List<GATKRead> regionExpectedReads = Lists.newArrayList(innerReadsSource.query(regionInterval)).stream().filter(combinedReadFilter).collect(Collectors.toList());

final List<GATKRead> actualNotInExpected = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public class MultiIntervalLocalReadShardUnitTest extends GATKBaseTest {

@DataProvider
public Object[][] shardBoundsTestData() {
final ReadsDataSource readsSource = new ReadsPathDataSource(IOUtils.getPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam"));
final ReadsDataSource readsSource = new ReadsPathDataSource(new GATKPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam"));

return new Object[][] {
// Shard, expected shard intervals, expected padded shard intervals
Expand Down Expand Up @@ -116,7 +116,7 @@ public void clearItems() {

@DataProvider
public Object[][] shardIterationTestData() {
final ReadsDataSource readsSource = new ReadsPathDataSource(IOUtils.getPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam"));
final ReadsDataSource readsSource = new ReadsPathDataSource(new GATKPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam"));

final ReadFilter keepReadBOnly = new ReadFilter() {
private static final long serialVersionUID = 1l;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,16 @@ public class PipelineSupportIntegrationTest extends GATKBaseTest {
@Test
// Asserting that Picard programs are able to pipe their output without errors
public void testPipeForPicardTools() {
File output = createTempFile("testOutput",".bam");
final File output = createTempFile("testOutput",".bam");
BaseTest.runProcess(ProcessController.getThreadLocal(), new String[]{"/bin/sh","-c"," ./gatk SortSam -I "+INPUT_BAM+" -O /dev/stdout -SO coordinate " +
"| ./gatk SetNmMdAndUqTags -I /dev/stdin -O "+ output.getAbsolutePath()+ " --CREATE_INDEX true -R "+b37_reference_20_21});//.split(" "));
try (ReadsDataSource inputReads = new ReadsPathDataSource(new File(INPUT_BAM).toPath());
ReadsDataSource outputReads = new ReadsPathDataSource(output.toPath())) {
try (final ReadsDataSource inputReads = new ReadsPathDataSource(new GATKPath(INPUT_BAM));
final ReadsDataSource outputReads = new ReadsPathDataSource(new GATKPath(output.toString()))) {
Assert.assertTrue(inputReads.iterator().hasNext());
Assert.assertTrue(outputReads.iterator().hasNext());
final int[] count = {0};
inputReads.forEach(r -> {
count[0]++;});
outputReads.forEach(r -> {
count[0]--;});
inputReads.forEach(r -> count[0]++);
outputReads.forEach(r -> count[0]--);
Assert.assertEquals(count[0],0);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public Object[][] getEmptyReadsContextData() {
{ new ReadsContext() },
{ new ReadsContext(null, null, null) },
{ new ReadsContext(null, new SimpleInterval("1", 1, 1), null ) },
{ new ReadsContext(new ReadsPathDataSource(IOUtils.getPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam")), null) }
{ new ReadsContext(new ReadsPathDataSource(new GATKPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam")), null) }
};
}

Expand All @@ -45,12 +45,12 @@ public Object[][] getValidReadsContextData() {
readNameFilter.readName = "d";
return new Object[][]{
{new ReadsContext(
new ReadsPathDataSource(IOUtils.getPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam")),
new ReadsPathDataSource(new GATKPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam")),
new SimpleInterval("1", 200, 210), // query over small interval, with no read filter
ReadFilterLibrary.ALLOW_ALL_READS), new String[] { "a", "b", "c" },
},
{new ReadsContext(
new ReadsPathDataSource(IOUtils.getPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam")),
new ReadsPathDataSource(new GATKPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam")),
new SimpleInterval("1", 200, 1000), // query over larger interval with readNameFilter on "d"
readNameFilter), new String[] { "d" }
}
Expand All @@ -69,7 +69,7 @@ public void testReadsContextFiltering(final ReadsContext readsContext, String[]

@Test
public void testIteratorOverDifferentInterval() {
final ReadsDataSource readsDataSource = new ReadsPathDataSource(IOUtils.getPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam"));
final ReadsDataSource readsDataSource = new ReadsPathDataSource(new GATKPath(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam"));
final SimpleInterval originalInterval = new SimpleInterval("1", 200, 210);
final ReadsContext readsContext = new ReadsContext(readsDataSource, originalInterval, ReadFilterLibrary.ALLOW_ALL_READS);
final SimpleInterval otherInterval = new SimpleInterval("1", 276, 300);
Expand Down
Loading