diff --git a/Cargo.lock b/Cargo.lock index f6343dd7..19d28a2b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -593,6 +593,29 @@ dependencies = [ "url", ] +[[package]] +name = "aws-sdk-secretsmanager" +version = "1.49.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2ac19e43e100834e7b9e6f838af7506a5cb8ee7531d1104cb207b3d927e0c2" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", +] + [[package]] name = "aws-sdk-sso" version = "1.44.0" @@ -762,6 +785,16 @@ dependencies = [ "aws-smithy-types", ] +[[package]] +name = "aws-smithy-mocks-experimental" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1069164f54cd37cdcf67e30f77ed996ccd71ad85344b9bb0412a1ca224617b" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", +] + [[package]] name = "aws-smithy-protocol-test" version = "0.62.0" @@ -2275,10 +2308,14 @@ name = "htsget-config" version = "0.11.0" dependencies = [ "async-trait", + "aws-config", + "aws-sdk-secretsmanager", + "aws-smithy-mocks-experimental", "cfg-if", "clap", "crypt4gh", "figment", + "futures-util", "http 1.1.0", "http-serde", "noodles", diff --git a/deploy/README.md b/deploy/README.md index 9fa71a32..bf92fdaa 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -16,17 +16,19 @@ The CDK code in this directory constructs a CDK app from [`HtsgetLambdaStack`][h These are general settings for the CDK deployment. -| Name | Description | Type | -|--------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------| -| `config` | The location of the htsget-rs server config. This must be specified. This config file configures the htsget-rs server. See [htsget-config] for a list of available server configuration options. | `string` | -| `domain` | The domain name for the Route53 Hosted Zone that the htsget-rs server will be under. This must be specified. A hosted zone with this name will either be looked up or created depending on the value of [`lookupHostedZone?`](#lookupHostedZone). | `string` | -| `authorizer` | Deployment options related to the authorizer. Note that this option allows specifying an AWS [JWT authorizer][jwt-authorizer]. The JWT authorizer automatically verifies tokens issued by a Cognito user pool. | [`HtsgetJwtAuthSettings`](#htsgetjwtauthsettings) | -| `subDomain?` | The domain name prefix to use for the htsget-rs server. Together with the [`domain`](#domain), this specifies url that the htsget-rs server will be reachable under. Defaults to `"htsget"`. | `string` | -| `s3BucketResources` | The buckets to serve data from. If this is not specified, this defaults to `[]`. This affects which buckets are allowed to be accessed by the policy actions which are `["s3:List*", "s3:Get*"]`. Note that this option does not create buckets, it only gives permission to access them, see the `createS3Buckets` option. This option must be specified to allow `htsget-rs` to access data in buckets that are not created in this stack. | `string[]` | -| `lookupHostedZone?` | Whether to lookup the hosted zone with the domain name. Defaults to `true`. If `true`, attempts to lookup an existing hosted zone using the domain name. Set this to `false` if you want to create a new hosted zone with the domain name. | `boolean` | -| `createS3Bucket?` | Whether to create a test bucket. Defaults to true. Buckets are created with [`RemovalPolicy.RETAIN`](https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.RemovalPolicy.html). The correct access permissions are automatically added. | `boolean` | -| `bucketName?` | The name of the bucket created using `createS3Bucket`. The name defaults to an automatically generated CDK name, use this option to override that. This option only has an affect is `createS3Buckets` is true. | `string` | -| `copyTestData?` | Whether to copy test data into the bucket. Defaults to true. This copies the example data under the `data` directory to those buckets. This option only has an affect is `createS3Buckets` is true. | `boolean` | +| Name | Description | Type | +|---------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------| +| `config` | The location of the htsget-rs server config. This must be specified. This config file configures the htsget-rs server. See [htsget-config] for a list of available server configuration options. | `string` | +| `domain` | The domain name for the Route53 Hosted Zone that the htsget-rs server will be under. This must be specified. A hosted zone with this name will either be looked up or created depending on the value of [`lookupHostedZone?`](#lookupHostedZone). | `string` | +| `authorizer` | Deployment options related to the authorizer. Note that this option allows specifying an AWS [JWT authorizer][jwt-authorizer]. The JWT authorizer automatically verifies tokens issued by a Cognito user pool. | [`HtsgetJwtAuthSettings`](#htsgetjwtauthsettings) | +| `subDomain?` | The domain name prefix to use for the htsget-rs server. Together with the [`domain`](#domain), this specifies url that the htsget-rs server will be reachable under. Defaults to `"htsget"`. | `string` | +| `s3BucketResources` | The buckets to serve data from. If this is not specified, this defaults to `[]`. This affects which buckets are allowed to be accessed by the policy actions which are `["s3:List*", "s3:Get*"]`. Note that this option does not create buckets, it only gives permission to access them, see the `createS3Buckets` option. This option must be specified to allow `htsget-rs` to access data in buckets that are not created in this stack. | `string[]` | +| `lookupHostedZone?` | Whether to lookup the hosted zone with the domain name. Defaults to `true`. If `true`, attempts to lookup an existing hosted zone using the domain name. Set this to `false` if you want to create a new hosted zone with the domain name. | `boolean` | +| `createS3Bucket?` | Whether to create a test bucket. Defaults to true. Buckets are created with [`RemovalPolicy.RETAIN`](https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.RemovalPolicy.html). The correct access permissions are automatically added. | `boolean` | +| `bucketName?` | The name of the bucket created using `createS3Bucket`. The name defaults to an automatically generated CDK name, use this option to override that. This option only has an affect is `createS3Buckets` is true. | `string` | +| `copyTestData?` | Whether to copy test data into the bucket. Defaults to true. This copies the example data under the `data` directory to those buckets. This option only has an affect is `createS3Buckets` is true. | `boolean` | +| `copyTestData?` | Whether to create secrets corresponding to C4GH public and private keys that can be used with C4GH storage. This copies the private and public keys in the data directory. Note that private keys copied here are visible in the CDK template. This is not considered secure and should only be used for test data. Real secrets should be manually provisioned or created outside the CDK template. Defaults to false. Secrets are created with [`RemovalPolicy.RETAIN`](https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.RemovalPolicy.html). | `boolean` | +| `features?` | Additional features to compile htsget-rs with. Defaults to `[]`. `s3-storage` is always enabled. | `string[]` | #### HtsgetJwtAuthSettings diff --git a/deploy/bin/settings.ts b/deploy/bin/settings.ts index e28d4487..738ce79b 100644 --- a/deploy/bin/settings.ts +++ b/deploy/bin/settings.ts @@ -20,4 +20,7 @@ export const SETTINGS: HtsgetSettings = { // jwtAudience: ["audience"], // cogUserPoolId: "user-pool-id", }, + // Enable additional features for compiling htsget-rs. `s3-storage` is always enabled. + features: ["experimental"], + copyExampleKeys: true, }; diff --git a/deploy/config/example_deploy.toml b/deploy/config/example_deploy.toml index 39007774..bc4923a5 100644 --- a/deploy/config/example_deploy.toml +++ b/deploy/config/example_deploy.toml @@ -18,3 +18,8 @@ environment = "dev" regex = '^(?P.*?)/(?P.*)$' substitution_string = '$key' storage.backend = 'S3' + +[resolvers.storage.keys] +location = "SecretsManager" +private_key = "htsget-rs/c4gh-private-key" # pragma: allowlist secret +recipient_public_key = "htsget-rs/c4gh-recipient-public-key" \ No newline at end of file diff --git a/deploy/lib/htsget-lambda-stack.ts b/deploy/lib/htsget-lambda-stack.ts index fdd5b39b..8df6d6ff 100644 --- a/deploy/lib/htsget-lambda-stack.ts +++ b/deploy/lib/htsget-lambda-stack.ts @@ -6,6 +6,7 @@ import { CfnOutput, Duration, RemovalPolicy, + SecretValue, Stack, StackProps, Tags, @@ -42,6 +43,7 @@ import { BucketEncryption, } from "aws-cdk-lib/aws-s3"; import { BucketDeployment, Source } from "aws-cdk-lib/aws-s3-deployment"; +import { Secret } from "aws-cdk-lib/aws-secretsmanager"; /** * Settings related to the htsget lambda stack. @@ -101,6 +103,20 @@ export type HtsgetSettings = { * directory to those buckets. This option only has an affect is `createS3Buckets` is true. */ copyTestData?: boolean; + + /** + * Whether to create secrets corresponding to C4GH public and private keys that can be used with C4GH storage. + * This copies the private and public keys in the data directory. Note that private keys copied here are + * visible in the CDK template. This is not considered secure and should only be used for test data. Real secrets + * should be manually provisioned or created outside the CDK template. Defaults to false. Secrets are created + * with [`RemovalPolicy.RETAIN`](https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.RemovalPolicy.html). + */ + copyExampleKeys?: boolean; + + /** + * Additional features to compile htsget-rs with. Defaults to `[]`. `s3-storage` is always enabled. + */ + features?: string[]; }; /** @@ -211,6 +227,31 @@ export class HtsgetLambdaStack extends Stack { new CfnOutput(this, "HtsgetBucketName", { value: bucket.bucketName }); } + if (settings.copyExampleKeys) { + const dataDir = path.join(__dirname, "..", "..", "data", "c4gh", "keys"); + const private_key = new Secret(this, "SecretPrivateKey", { + secretName: "htsget-rs/c4gh-private-key", // pragma: allowlist secret + secretStringValue: SecretValue.unsafePlainText( + readFileSync(path.join(dataDir, "bob.sec")).toString(), + ), + removalPolicy: RemovalPolicy.RETAIN, + }); + const public_key = new Secret(this, "SecretPublicKey", { + secretName: "htsget-rs/c4gh-recipient-public-key", // pragma: allowlist secret + secretStringValue: SecretValue.unsafePlainText( + readFileSync(path.join(dataDir, "alice.pub")).toString(), + ), + removalPolicy: RemovalPolicy.RETAIN, + }); + + lambdaRole.addToPolicy( + new PolicyStatement({ + actions: ["secretsmanager:GetSecretValue"], + resources: [private_key.secretArn, public_key.secretArn], + }), + ); + } + lambdaRole.addManagedPolicy( ManagedPolicy.fromAwsManagedPolicyName( "service-role/AWSLambdaBasicExecutionRole", @@ -218,6 +259,11 @@ export class HtsgetLambdaStack extends Stack { ); lambdaRole.addToPolicy(s3BucketPolicy); + let features = settings.features ?? []; + features = features + .filter((f) => f !== "s3-storage") + .concat(["s3-storage"]); + let htsgetLambda = new RustFunction(this, id + "Function", { manifestPath: path.join(__dirname, "..", ".."), binaryName: "htsget-lambda", @@ -227,7 +273,7 @@ export class HtsgetLambdaStack extends Stack { CARGO_PROFILE_RELEASE_LTO: "true", CARGO_PROFILE_RELEASE_CODEGEN_UNITS: "1", }, - cargoLambdaFlags: ["--features", "s3-storage"], + cargoLambdaFlags: ["--features", features.join(",")], }, memorySize: 128, timeout: Duration.seconds(28), diff --git a/htsget-config/Cargo.toml b/htsget-config/Cargo.toml index 9ea5c212..089158e8 100644 --- a/htsget-config/Cargo.toml +++ b/htsget-config/Cargo.toml @@ -11,9 +11,9 @@ homepage = "https://github.com/umccr/htsget-rs/blob/main/htsget-config/README.md repository = "https://github.com/umccr/htsget-rs" [features] -s3-storage = [] +s3-storage = ["dep:aws-sdk-secretsmanager", "dep:aws-config", "dep:tempfile"] url-storage = ["dep:reqwest", "dep:cfg-if"] -experimental = ["dep:crypt4gh"] +experimental = ["dep:crypt4gh", "dep:tokio", "dep:futures-util"] default = [] [dependencies] @@ -41,6 +41,13 @@ cfg-if = { version = "1", optional = true } # Crypt4GH crypt4gh = { version = "0.4", git = "https://github.com/EGA-archive/crypt4gh-rust", optional = true } +tokio = { version = "1", features = ["rt"], optional = true } +futures-util = { version = "0.3", optional = true } + +# Secrets manager +aws-sdk-secretsmanager = { version = "1", optional = true, features = ["test-util"] } +aws-config = { version = "1", optional = true } +tempfile = { version = "3", optional = true } [dev-dependencies] serde_json = "1" @@ -48,3 +55,4 @@ figment = { version = "0.10", features = ["test"] } tokio = { version = "1", features = ["macros", "rt-multi-thread"] } tempfile = "3" rcgen = { version = "0.13", features = ["pem"] } +aws-smithy-mocks-experimental = "0.2" diff --git a/htsget-config/README.md b/htsget-config/README.md index f88bc89c..4e3be2d8 100644 --- a/htsget-config/README.md +++ b/htsget-config/README.md @@ -508,12 +508,12 @@ serving the data, htsget-rs will decrypt the headers of the Crypt4GH files and r them. When the client receives byte ranges from htsget-rs and concatenates them, the output bytes will be Crypt4GH encrypted, and will need to be decrypted before they can be read. All file formats (BAM, CRAM, VCF, and BCF) are supported using Crypt4GH. -To use this feature, additional config under `resolvers.storage` is required to specify the private and public keys: +To use this feature, set `location = 'Local'` under `resolvers.storage.keys` to specify the private and public keys: -| Option | Description | Type | Default | -|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------|---------| -| `private_key` | The path to private key which htsget-rs uses to decrypt Crypt4GH data. | Filesystem path | Not Set | -| `recipient_public_key` | The path to the public key which the recipient of the data will use. This is what the client will use to decrypt the returned data, using the corresponding private key. | Filesystem path | Not Set | +| Option | Description | Type | Default | +|------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------|---------| +| `private_key` | The path to PEM formatted private key which htsget-rs uses to decrypt Crypt4GH data. | Filesystem path | Not Set | +| `recipient_public_key` | The path to the PEM formatted public key which the recipient of the data will use. This is what the client will use to decrypt the returned data, using the corresponding private key. | Filesystem path | Not Set | For example: @@ -522,12 +522,29 @@ For example: regex = '.*' substitution_string = '$0' -[resolvers.storage] -backend = 'Local' +[resolvers.storage.keys] +location = 'Local' private_key = 'data/c4gh/keys/bob.sec' # pragma: allowlist secret recipient_public_key = 'data/c4gh/keys/alice.pub' ``` +Keys can also be retrieved from [AWS Secrets Manager][secrets-manager]. Compile with the `s3-storage` feature flag and specify `location = 'SecretsManager'` under +`resolvers.storage.keys` to fetch keys from Secrets Manager. When using Secrets Manager, the `private_key` and `recipient_public_key` +correspond to ARNs or secret names in Secrets Manager storing PEM formatted keys. + +For example: + +```toml +[[resolvers]] +regex = '.*' +substitution_string = '$0' + +[resolvers.storage.keys] +location = 'SecretsManager' +private_key = 'private_key_secret_name' # pragma: allowlist secret +recipient_public_key = 'public_key_secret_name' +``` + The htsget-rs server expects the Crypt4GH file to end with `.c4gh`, and the index file to be unencrypted. See the [`data/c4gh`][data-c4gh] for examples of file structure. Any of the storage types are supported, i.e. `Local`, `S3`, or `Url`. @@ -557,4 +574,5 @@ This project is licensed under the [MIT license][license]. [license]: LICENSE [minio]: https://min.io/ [c4gh]: https://samtools.github.io/hts-specs/crypt4gh.pdf -[data-c4gh]: ../data/c4gh \ No newline at end of file +[data-c4gh]: ../data/c4gh +[secrets-manager]: https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html \ No newline at end of file diff --git a/htsget-config/examples/config-files/c4gh.toml b/htsget-config/examples/config-files/c4gh.toml index c17c36d3..4a350fec 100644 --- a/htsget-config/examples/config-files/c4gh.toml +++ b/htsget-config/examples/config-files/c4gh.toml @@ -11,5 +11,13 @@ substitution_string = "$0" [resolvers.storage] backend = 'Local' +[resolvers.storage.keys] +location = "Local" private_key = "data/c4gh/keys/bob.sec" # pragma: allowlist secret recipient_public_key = "data/c4gh/keys/alice.pub" + +# Or, use AWS secrets manager to store keys. +#[resolvers.storage.keys] +#location = "SecretsManager" +#private_key = "htsget/test_c4gh_private_key" # pragma: allowlist secret +#recipient_public_key = "htsget/test_c4gh_public_key" diff --git a/htsget-config/src/error.rs b/htsget-config/src/error.rs index 9c2d68f4..50eba8de 100644 --- a/htsget-config/src/error.rs +++ b/htsget-config/src/error.rs @@ -6,7 +6,7 @@ use thiserror::Error; pub type Result = result::Result; /// The error type for config. -#[derive(Error, Debug, PartialEq, Eq)] +#[derive(Error, Debug, Clone, PartialEq, Eq)] pub enum Error { #[error("io found: {0}")] IoError(String), diff --git a/htsget-config/src/resolver.rs b/htsget-config/src/resolver.rs index 93720595..dab3a434 100644 --- a/htsget-config/src/resolver.rs +++ b/htsget-config/src/resolver.rs @@ -692,12 +692,16 @@ mod tests { ); let resolver = config.resolvers().first().unwrap(); let expected_storage = S3::new("bucket".to_string(), None, false); + let Storage::S3(storage) = resolver.storage() else { + panic!(); + }; + + assert_eq!(storage.bucket(), expected_storage.bucket()); + assert_eq!(storage.endpoint(), expected_storage.endpoint()); + assert_eq!(storage.path_style(), expected_storage.path_style()); assert_eq!(resolver.regex().to_string(), "regex"); assert_eq!(resolver.substitution_string(), "substitution_string"); - assert!( - matches!(resolver.storage(), Storage::S3(s3_storage) if s3_storage == &expected_storage) - ); assert_eq!(resolver.allow_guard(), &allow_guard); }, ); diff --git a/htsget-config/src/storage/c4gh.rs b/htsget-config/src/storage/c4gh/local.rs similarity index 71% rename from htsget-config/src/storage/c4gh.rs rename to htsget-config/src/storage/c4gh/local.rs index 3f300e8c..c33c8f8a 100644 --- a/htsget-config/src/storage/c4gh.rs +++ b/htsget-config/src/storage/c4gh/local.rs @@ -1,35 +1,21 @@ -//! Crypt4GH key parsing. +//! Local C4GH key storage. //! -use crate::error::Error::ParseError; use crate::error::{Error, Result}; -use crypt4gh::error::Crypt4GHError; +use crate::storage::c4gh::C4GHKeys; use crypt4gh::keys::{get_private_key, get_public_key}; -use crypt4gh::Keys; use serde::Deserialize; use std::path::PathBuf; -/// Config for Crypt4GH keys. +/// Local C4GH key storage. #[derive(Deserialize, Debug, Clone, PartialEq, Eq)] -#[serde(try_from = "C4GHPath")] -pub struct C4GHKeys { - keys: Vec, -} - -impl C4GHKeys { - /// Get the inner value. - pub fn into_inner(self) -> Vec { - self.keys - } -} - -#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] -pub struct C4GHPath { +pub struct C4GHLocal { private_key: PathBuf, recipient_public_key: PathBuf, } -impl C4GHPath { +impl C4GHLocal { + /// Create a new local C4GH key storage. pub fn new(private_key: PathBuf, recipient_public_key: PathBuf) -> Self { Self { private_key, @@ -38,26 +24,17 @@ impl C4GHPath { } } -impl TryFrom for C4GHKeys { +impl TryFrom for C4GHKeys { type Error = Error; - fn try_from(path: C4GHPath) -> Result { - let private_key = get_private_key(path.private_key, Ok("".to_string()))?; - let recipient_public_key = get_public_key(path.recipient_public_key)?; - - Ok(C4GHKeys { - keys: vec![Keys { - method: 0, - privkey: private_key, - recipient_pubkey: recipient_public_key, - }], - }) - } -} + fn try_from(local: C4GHLocal) -> Result { + let private_key = get_private_key(local.private_key, Ok("".to_string()))?; + let recipient_public_key = get_public_key(local.recipient_public_key)?; + + let handle = + tokio::spawn(async move { Ok(C4GHKeys::from_key_pair(private_key, recipient_public_key)) }); -impl From for Error { - fn from(err: Crypt4GHError) -> Self { - ParseError(err.to_string()) + Ok(C4GHKeys::from_join_handle(handle)) } } @@ -98,6 +75,9 @@ mod tests { [resolvers.storage] {} + + [resolvers.storage.keys] + location = "Local" private_key = "{}" recipient_public_key = "{}" "#, @@ -111,9 +91,8 @@ mod tests { }, ); } - - #[test] - fn config_local_storage_c4gh() { + #[tokio::test] + async fn config_local_storage_c4gh() { test_c4gh_storage_config(r#"backend = "Local""#, |config| { assert!(matches!( config.resolvers().first().unwrap().storage(), @@ -123,8 +102,8 @@ mod tests { } #[cfg(feature = "s3-storage")] - #[test] - fn config_s3_storage_c4gh() { + #[tokio::test] + async fn config_s3_storage_c4gh() { test_c4gh_storage_config( r#" backend = "S3" @@ -140,8 +119,8 @@ mod tests { } #[cfg(feature = "url-storage")] - #[test] - fn config_url_storage_c4gh() { + #[tokio::test] + async fn config_url_storage_c4gh() { test_c4gh_storage_config( r#" backend = "Url" diff --git a/htsget-config/src/storage/c4gh/mod.rs b/htsget-config/src/storage/c4gh/mod.rs new file mode 100644 index 00000000..b6d93800 --- /dev/null +++ b/htsget-config/src/storage/c4gh/mod.rs @@ -0,0 +1,84 @@ +//! Crypt4GH key parsing. +//! + +use crate::error::Error::{IoError, ParseError}; +use crate::error::{Error, Result}; +use crate::storage::c4gh::local::C4GHLocal; +#[cfg(feature = "s3-storage")] +use crate::storage::c4gh::secrets_manager::C4GHSecretsManager; +use crypt4gh::error::Crypt4GHError; +use futures_util::future::{BoxFuture, Shared}; +use futures_util::FutureExt; +use serde::Deserialize; +use tokio::task::{JoinError, JoinHandle}; + +pub mod local; + +#[cfg(feature = "s3-storage")] +pub mod secrets_manager; + +/// Config for Crypt4GH keys. +#[derive(Deserialize, Debug, Clone)] +#[serde(try_from = "Location")] +pub struct C4GHKeys { + // Store a cloneable future so that it can be resolved outside serde. + keys: Shared>>>, +} + +impl C4GHKeys { + /// Get the inner value. + pub async fn keys(self) -> Result> { + self.keys.await + } + + /// Construct the C4GH keys from a key pair. + pub fn from_key_pair(private_key: Vec, recipient_public_key: Vec) -> Vec { + vec![crypt4gh::Keys { + method: 0, + privkey: private_key, + recipient_pubkey: recipient_public_key, + }] + } + + pub fn from_join_handle(handle: JoinHandle>>) -> Self { + Self { + keys: handle.map(|value| value?).boxed().shared(), + } + } +} + +impl From for Error { + fn from(err: JoinError) -> Self { + IoError(err.to_string()) + } +} + +impl From for Error { + fn from(err: Crypt4GHError) -> Self { + ParseError(err.to_string()) + } +} + +impl TryFrom for C4GHKeys { + type Error = Error; + + fn try_from(location: Location) -> Result { + match location { + Location::Local(local) => local.try_into(), + #[cfg(feature = "s3-storage")] + Location::SecretsManager(secrets_manager) => secrets_manager.try_into(), + } + } +} + +/// The location of C4GH keys. +#[derive(Deserialize, Debug, Clone)] +#[serde(tag = "location", deny_unknown_fields)] +#[non_exhaustive] +pub enum Location { + #[serde(alias = "local", alias = "LOCAL")] + Local(C4GHLocal), + #[cfg(feature = "s3-storage")] + #[serde(alias = "secretsmanager", alias = "SECRETSMANAGER")] + SecretsManager(C4GHSecretsManager), +} diff --git a/htsget-config/src/storage/c4gh/secrets_manager.rs b/htsget-config/src/storage/c4gh/secrets_manager.rs new file mode 100644 index 00000000..030792e1 --- /dev/null +++ b/htsget-config/src/storage/c4gh/secrets_manager.rs @@ -0,0 +1,178 @@ +//! Obtain C4GH keys from AWS secrets manager. +//! + +use crate::error::Error::ParseError; +use crate::error::{Error, Result}; +use crate::storage::c4gh::C4GHKeys; +use aws_config::{load_defaults, BehaviorVersion}; +use aws_sdk_secretsmanager::error::SdkError; +use aws_sdk_secretsmanager::Client; +use crypt4gh::keys::{get_private_key, get_public_key}; +use crypt4gh::Keys; +use serde::Deserialize; +use std::fs; +use std::path::Path; +use tempfile::TempDir; + +/// C4GH secrets manager key storage. +#[derive(Deserialize, Debug, Clone)] +pub struct C4GHSecretsManager { + private_key: String, + recipient_public_key: String, + #[serde(skip)] + client: Option, +} + +impl C4GHSecretsManager { + /// Create a new C4GH secrets manager key storage. + pub fn new(private_key: String, recipient_public_key: String) -> Self { + Self { + private_key, + recipient_public_key, + client: None, + } + } + + /// Set the client. + pub fn with_client(mut self, client: Client) -> Self { + self.client = Some(client); + self + } + + /// Retrieve a binary secret from secrets manager. + pub async fn get_secret(client: &Client, id: impl Into) -> Result> { + let secret = client.get_secret_value().secret_id(id).send().await?; + + if let Some(secret) = secret.secret_binary { + Ok(secret.into_inner()) + } else if let Some(secret) = secret.secret_string { + Ok(secret.into_bytes()) + } else { + Err(ParseError("failed to get C4GH keys secret".to_string())) + } + } + + async fn write_to_file(to: &Path, secret: impl Into, client: &Client) -> Result<()> { + let data = Self::get_secret(client, secret).await?; + Ok(fs::write(to, data)?) + } + + /// Retrieve the C4GH keys from secrets manager. + pub async fn get_keys(self) -> Result> { + let client = if let Some(client) = self.client { + client + } else { + Client::new(&load_defaults(BehaviorVersion::latest()).await) + }; + + // Should not have to do this, but the Crypt4GH library expects a path. + let tmp = TempDir::new()?; + let private_key = tmp.path().join("private_key"); + Self::write_to_file(&private_key, self.private_key, &client).await?; + + let recipient_public_key = tmp.path().join("public_key"); + Self::write_to_file(&recipient_public_key, self.recipient_public_key, &client).await?; + + let private_key = get_private_key(private_key, Ok("".to_string()))?; + let recipient_public_key = get_public_key(recipient_public_key)?; + + Ok(C4GHKeys::from_key_pair(private_key, recipient_public_key)) + } +} + +impl From> for Error { + fn from(err: SdkError) -> Self { + Error::IoError(err.to_string()) + } +} + +impl TryFrom for C4GHKeys { + type Error = Error; + + fn try_from(secrets_manager: C4GHSecretsManager) -> Result { + Ok(C4GHKeys::from_join_handle(tokio::spawn( + secrets_manager.get_keys(), + ))) + } +} + +#[cfg(test)] +mod tests { + use aws_sdk_secretsmanager::operation::get_secret_value::GetSecretValueOutput; + use aws_sdk_secretsmanager::primitives::Blob; + use aws_smithy_mocks_experimental::{mock, mock_client, Rule, RuleMode}; + use std::fs::read; + use std::path::PathBuf; + + use super::*; + + async fn test_get_keys(rules: &[&Rule]) { + let client = mock_client!(aws_sdk_secretsmanager, RuleMode::Sequential, rules); + + let secrets_manager_config = C4GHSecretsManager::new( + "private_key".to_string(), + "recipient_public_key".to_string(), + ) + .with_client(client); + let keys: C4GHKeys = secrets_manager_config.try_into().unwrap(); + let keys = keys.keys().await.unwrap(); + + assert_eq!(keys.len(), 1); + } + + #[tokio::test] + async fn config_test_get_keys_string() { + let parent = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .to_path_buf(); + + let private_key = read(parent.join("data/c4gh/keys/bob.sec")).unwrap(); + let recipient_public_key = read(parent.join("data/c4gh/keys/alice.pub")).unwrap(); + + let get_private_key = mock!(Client::get_secret_value) + .match_requests(|req| req.secret_id() == Some("private_key")) + .then_output(move || { + GetSecretValueOutput::builder() + .secret_string(String::from_utf8(private_key.clone()).unwrap()) + .build() + }); + let get_recipient_public_key = mock!(Client::get_secret_value) + .match_requests(|req| req.secret_id() == Some("recipient_public_key")) + .then_output(move || { + GetSecretValueOutput::builder() + .secret_string(String::from_utf8(recipient_public_key.clone()).unwrap()) + .build() + }); + + test_get_keys(&[&get_private_key, &get_recipient_public_key]).await; + } + + #[tokio::test] + async fn config_test_get_keys_binary() { + let parent = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .to_path_buf(); + + let private_key = read(parent.join("data/c4gh/keys/bob.sec")).unwrap(); + let recipient_public_key = read(parent.join("data/c4gh/keys/alice.pub")).unwrap(); + + let get_private_key = mock!(Client::get_secret_value) + .match_requests(|req| req.secret_id() == Some("private_key")) + .then_output(move || { + GetSecretValueOutput::builder() + .secret_binary(Blob::new(private_key.clone())) + .build() + }); + let get_recipient_public_key = mock!(Client::get_secret_value) + .match_requests(|req| req.secret_id() == Some("recipient_public_key")) + .then_output(move || { + GetSecretValueOutput::builder() + .secret_binary(Blob::new(recipient_public_key.clone())) + .build() + }); + + test_get_keys(&[&get_private_key, &get_recipient_public_key]).await; + } +} diff --git a/htsget-config/src/storage/local.rs b/htsget-config/src/storage/local.rs index 3a8007bc..a514d916 100644 --- a/htsget-config/src/storage/local.rs +++ b/htsget-config/src/storage/local.rs @@ -17,7 +17,7 @@ fn default_local_path() -> String { default_path().into() } -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Deserialize, Debug, Clone)] #[serde(default)] pub struct Local { scheme: Scheme, @@ -26,7 +26,7 @@ pub struct Local { local_path: String, path_prefix: String, use_data_server_config: bool, - #[serde(skip_serializing, flatten)] + #[serde(skip_serializing)] #[cfg(feature = "experimental")] keys: Option, } @@ -168,6 +168,13 @@ mod tests { true, ); - assert_eq!(result, expected); + assert_eq!(result.scheme(), expected.scheme()); + assert_eq!(result.authority(), expected.authority()); + assert_eq!(result.local_path(), expected.local_path()); + assert_eq!(result.path_prefix(), expected.path_prefix()); + assert_eq!( + result.use_data_server_config(), + expected.use_data_server_config() + ); } } diff --git a/htsget-config/src/storage/s3.rs b/htsget-config/src/storage/s3.rs index 9098fa37..5be71ca6 100644 --- a/htsget-config/src/storage/s3.rs +++ b/htsget-config/src/storage/s3.rs @@ -2,13 +2,13 @@ use crate::storage::c4gh::C4GHKeys; use serde::{Deserialize, Serialize}; -#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, Eq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone)] #[serde(default)] pub struct S3 { pub(crate) bucket: String, pub(crate) endpoint: Option, pub(crate) path_style: bool, - #[serde(skip_serializing, flatten)] + #[serde(skip_serializing)] #[cfg(feature = "experimental")] pub(crate) keys: Option, } @@ -31,12 +31,12 @@ impl S3 { } /// Get the endpoint - pub fn endpoint(self) -> Option { - self.endpoint + pub fn endpoint(&self) -> Option<&str> { + self.endpoint.as_deref() } /// Get the path style - pub fn path_style(self) -> bool { + pub fn path_style(&self) -> bool { self.path_style } diff --git a/htsget-config/src/storage/url.rs b/htsget-config/src/storage/url.rs index d5396dca..fe1dbbc1 100644 --- a/htsget-config/src/storage/url.rs +++ b/htsget-config/src/storage/url.rs @@ -27,7 +27,7 @@ pub struct UrlStorage { header_blacklist: Vec, #[serde(skip_serializing)] tls: TlsClientConfig, - #[serde(skip_serializing, flatten)] + #[serde(skip_serializing)] #[cfg(feature = "experimental")] keys: Option, } diff --git a/htsget-search/src/bam_search.rs b/htsget-search/src/bam_search.rs index 1fd83e3f..1fc2ac1b 100644 --- a/htsget-search/src/bam_search.rs +++ b/htsget-search/src/bam_search.rs @@ -151,11 +151,8 @@ pub(crate) mod tests { use crate::from_storage::tests::with_aws_storage_fn; use crate::from_storage::tests::with_local_storage_fn; use crate::{Class::Body, Class::Header, Headers, HtsGetError::NotFound, Response, Url}; - use htsget_config::storage::local::Local as ConfigLocalStorage; - use htsget_storage::local::LocalStorage; use htsget_test::http::concat::ConcatResponse; use std::future::Future; - use std::sync::Arc; #[cfg(feature = "experimental")] use { crate::from_storage::tests::with_local_storage_c4gh, @@ -169,7 +166,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_all_reads() { with_local_storage(|storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam); let response = search.search(query).await; println!("{response:#?}"); @@ -189,7 +186,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_unmapped_reads() { with_local_storage(|storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("*"); let response = search.search(query).await; @@ -216,7 +213,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_without_seq_range_chr11() { with_local_storage(|storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("11"); let response = search.search(query).await; @@ -240,7 +237,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_without_seq_range_chr20() { with_local_storage(|storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("20"); let response = search.search(query).await; @@ -268,7 +265,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_with_seq_range() { with_local_storage(|storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("11") .with_start(5015000) @@ -304,7 +301,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_no_end_position() { with_local_storage(|storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("11") .with_start(5015000); @@ -333,7 +330,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_many_response_urls() { with_local_storage(|storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("11") .with_start(4999976) @@ -368,7 +365,7 @@ pub(crate) mod tests { async fn search_no_gzi() { with_local_storage_fn( |storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("11") .with_start(5015000) @@ -401,7 +398,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_header() { with_local_storage(|storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam).with_class(Header); let response = search.search(query).await; @@ -426,7 +423,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_header_with_no_mapped_reads() { with_local_storage(|storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("22"); let response = search.search(query).await; @@ -451,7 +448,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_header_with_non_existent_reference_name() { with_local_storage(|storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("25"); let response = search.search(query).await; @@ -468,7 +465,7 @@ pub(crate) mod tests { async fn search_non_existent_id_reference_name() { with_local_storage_fn( |storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -485,7 +482,7 @@ pub(crate) mod tests { async fn search_non_existent_id_all_reads() { with_local_storage_fn( |storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("20"); let response = search.search(query).await; @@ -503,7 +500,7 @@ pub(crate) mod tests { async fn search_non_existent_id_header() { with_local_storage_fn( |storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam).with_class(Header); let response = search.search(query).await; @@ -521,7 +518,7 @@ pub(crate) mod tests { async fn get_header_end_offset() { with_local_storage_fn( |storage| async move { - let search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam).with_class(Header); @@ -543,7 +540,7 @@ pub(crate) mod tests { async fn search_non_existent_id_reference_name_aws() { with_aws_storage_fn( |storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam); let response = search.search(query).await; assert!(response.is_err()); @@ -561,7 +558,7 @@ pub(crate) mod tests { async fn search_non_existent_id_all_reads_aws() { with_aws_storage_fn( |storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("20"); let response = search.search(query).await; @@ -580,7 +577,7 @@ pub(crate) mod tests { async fn search_non_existent_id_header_aws() { with_aws_storage_fn( |storage| async move { - let mut search = BamSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BamSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam).with_class(Header); let response = search.search(query).await; @@ -598,7 +595,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_all_c4gh() { with_local_storage_c4gh(|storage| async move { - let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let storage = C4GHStorage::new(get_decryption_keys().await, storage); let mut search = BamSearch::new(Storage::new(storage)); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam); let response = search.search(query).await.unwrap(); @@ -617,7 +614,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_all_range_c4gh() { with_local_storage_c4gh(|storage| async move { - let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let storage = C4GHStorage::new(get_decryption_keys().await, storage); let mut search = BamSearch::new(Storage::new(storage)); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("11") @@ -637,7 +634,7 @@ pub(crate) mod tests { pub(crate) async fn with_local_storage(test: F) where - F: FnOnce(Arc>) -> Fut, + F: FnOnce(Storage) -> Fut, Fut: Future>, { with_local_storage_fn(test, DATA_LOCATION, &[]).await diff --git a/htsget-search/src/bcf_search.rs b/htsget-search/src/bcf_search.rs index 605c12b1..c52c373c 100644 --- a/htsget-search/src/bcf_search.rs +++ b/htsget-search/src/bcf_search.rs @@ -108,11 +108,9 @@ impl BcfSearch { #[cfg(test)] mod tests { - use htsget_config::storage::local::Local as ConfigLocalStorage; use htsget_config::types::Class::Body; use htsget_test::http::concat::ConcatResponse; use std::future::Future; - use std::sync::Arc; use super::*; #[cfg(feature = "s3-storage")] @@ -120,7 +118,6 @@ mod tests { use crate::from_storage::tests::with_local_storage_fn; use crate::search::SearchAll; use crate::{Class::Header, Headers, HtsGetError::NotFound, Response, Url}; - use htsget_storage::local::LocalStorage; #[cfg(feature = "experimental")] use { crate::from_storage::tests::with_local_storage_c4gh, @@ -135,7 +132,7 @@ mod tests { #[tokio::test] async fn search_all_variants() { with_local_storage(|storage| async move { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Bcf); let response = search.search(query).await; @@ -155,7 +152,7 @@ mod tests { #[tokio::test] async fn search_reference_name_without_seq_range() { with_local_storage(|storage| async move { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let filename = "vcf-spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Bcf).with_reference_name("20"); let response = search.search(query).await; @@ -187,7 +184,7 @@ mod tests { #[tokio::test] async fn search_reference_name_no_end_position() { with_local_storage(|storage| async move { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Bcf) .with_reference_name("chrM") @@ -219,7 +216,7 @@ mod tests { #[tokio::test] async fn search_header() { with_local_storage(|storage| async move { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let filename = "vcf-spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Bcf).with_class(Header); let response = search.search(query).await; @@ -245,7 +242,7 @@ mod tests { async fn search_non_existent_id_reference_name() { with_local_storage_fn( |storage| async move { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -262,7 +259,7 @@ mod tests { async fn search_non_existent_id_all_reads() { with_local_storage_fn( |storage| async move { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_reference_name("chrM"); let response = search.search(query).await; @@ -280,7 +277,7 @@ mod tests { async fn search_non_existent_id_header() { with_local_storage_fn( |storage| async move { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_class(Header); let response = search.search(query).await; @@ -297,7 +294,7 @@ mod tests { #[tokio::test] async fn search_header_with_non_existent_reference_name() { with_local_storage(|storage| async move { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_reference_name("chr1"); let response = search.search(query).await; @@ -314,7 +311,7 @@ mod tests { async fn get_header_end_offset() { with_local_storage_fn( |storage| async move { - let search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let search = BcfSearch::new(storage); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_class(Header); @@ -336,7 +333,7 @@ mod tests { async fn search_non_existent_id_reference_name_aws() { with_aws_storage_fn( |storage| async move { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf); let response = search.search(query).await; assert!(response.is_err()); @@ -354,7 +351,7 @@ mod tests { async fn search_non_existent_id_all_reads_aws() { with_aws_storage_fn( |storage| async move { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_reference_name("chrM"); let response = search.search(query).await; @@ -373,7 +370,7 @@ mod tests { async fn search_non_existent_id_header_aws() { with_aws_storage_fn( |storage| async move { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_class(Header); let response = search.search(query).await; @@ -391,7 +388,7 @@ mod tests { #[tokio::test] async fn search_all_c4gh() { with_local_storage_c4gh(|storage| async move { - let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let storage = C4GHStorage::new(get_decryption_keys().await, storage); let mut search = BcfSearch::new(Storage::new(storage)); let query = Query::new_with_default_request("sample1-bcbio-cancer", Format::Bcf); let response = search.search(query).await.unwrap(); @@ -410,7 +407,7 @@ mod tests { #[tokio::test] async fn search_range_c4gh() { with_local_storage_c4gh(|storage| async move { - let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let storage = C4GHStorage::new(get_decryption_keys().await, storage); let mut search = BcfSearch::new(Storage::new(storage)); let query = Query::new_with_default_request("sample1-bcbio-cancer", Format::Bcf) .with_reference_name("chrM") @@ -429,9 +426,9 @@ mod tests { } async fn test_reference_sequence_with_seq_range( - storage: Arc>, + storage: Storage, ) -> Option<(String, ConcatResponse)> { - let mut search = BcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = BcfSearch::new(storage); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Bcf) .with_reference_name("chrM") @@ -459,7 +456,7 @@ mod tests { async fn with_local_storage(test: F) where - F: FnOnce(Arc>) -> Fut, + F: FnOnce(Storage) -> Fut, Fut: Future>, { with_local_storage_fn(test, "data/bcf", &[]).await diff --git a/htsget-search/src/cram_search.rs b/htsget-search/src/cram_search.rs index 10aeaf0d..07251f3f 100644 --- a/htsget-search/src/cram_search.rs +++ b/htsget-search/src/cram_search.rs @@ -274,7 +274,6 @@ impl CramSearch { mod tests { use std::future::Future; - use htsget_config::storage::local::Local as ConfigLocalStorage; use htsget_test::http::concat::ConcatResponse; use super::*; @@ -282,7 +281,6 @@ mod tests { use crate::from_storage::tests::with_aws_storage_fn; use crate::from_storage::tests::with_local_storage_fn; use crate::{Class::Header, Headers, HtsGetError::NotFound, Response, Url}; - use htsget_storage::local::LocalStorage; #[cfg(feature = "experimental")] use { crate::from_storage::tests::with_local_storage_c4gh, @@ -296,7 +294,7 @@ mod tests { #[tokio::test] async fn search_all_reads() { with_local_storage(|storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram); let response = search.search(query).await; println!("{response:#?}"); @@ -316,7 +314,7 @@ mod tests { #[tokio::test] async fn search_unmapped_reads() { with_local_storage(|storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("*"); let response = search.search(query).await; @@ -343,7 +341,7 @@ mod tests { #[tokio::test] async fn search_reference_name_without_seq_range_chr11() { with_local_storage(|storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("11"); let response = search.search(query).await; @@ -367,7 +365,7 @@ mod tests { #[tokio::test] async fn search_reference_name_without_seq_range_chr20() { with_local_storage(|storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("20"); let response = search.search(query).await; @@ -395,7 +393,7 @@ mod tests { #[tokio::test] async fn search_reference_name_with_seq_range_no_overlap() { with_local_storage(|storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("11") .with_start(5000000) @@ -421,7 +419,7 @@ mod tests { #[tokio::test] async fn search_reference_name_with_seq_range_overlap() { with_local_storage(|storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("11") .with_start(5000000) @@ -440,7 +438,7 @@ mod tests { #[tokio::test] async fn search_reference_name_with_no_end_position() { with_local_storage(|storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("11") .with_start(5000000); @@ -469,7 +467,7 @@ mod tests { #[tokio::test] async fn search_header() { with_local_storage(|storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram).with_class(Header); let response = search.search(query).await; @@ -495,7 +493,7 @@ mod tests { async fn search_non_existent_id_reference_name() { with_local_storage_fn( |storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -512,7 +510,7 @@ mod tests { async fn search_non_existent_id_all_reads() { with_local_storage_fn( |storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("20"); let response = search.search(query).await; @@ -530,7 +528,7 @@ mod tests { async fn search_non_existent_id_header() { with_local_storage_fn( |storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram).with_class(Header); let response = search.search(query).await; @@ -549,7 +547,7 @@ mod tests { async fn search_non_existent_id_reference_name_aws() { with_aws_storage_fn( |storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram); let response = search.search(query).await; assert!(response.is_err()); @@ -567,7 +565,7 @@ mod tests { async fn search_non_existent_id_all_reads_aws() { with_aws_storage_fn( |storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("20"); let response = search.search(query).await; @@ -586,7 +584,7 @@ mod tests { async fn search_non_existent_id_header_aws() { with_aws_storage_fn( |storage| async move { - let mut search = CramSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = CramSearch::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram).with_class(Header); let response = search.search(query).await; @@ -604,7 +602,7 @@ mod tests { #[tokio::test] async fn search_all_c4gh() { with_local_storage_c4gh(|storage| async move { - let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let storage = C4GHStorage::new(get_decryption_keys().await, storage); let mut search = CramSearch::new(Storage::new(storage)); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram); let response = search.search(query).await.unwrap(); @@ -623,7 +621,7 @@ mod tests { #[tokio::test] async fn search_range_c4gh() { with_local_storage_c4gh(|storage| async move { - let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let storage = C4GHStorage::new(get_decryption_keys().await, storage); let mut search = CramSearch::new(Storage::new(storage)); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("11") @@ -643,7 +641,7 @@ mod tests { async fn with_local_storage(test: F) where - F: FnOnce(Arc>) -> Fut, + F: FnOnce(Storage) -> Fut, Fut: Future>, { with_local_storage_fn(test, "data/cram", &[]).await diff --git a/htsget-search/src/from_storage.rs b/htsget-search/src/from_storage.rs index c05b9ef4..d2d54bc6 100644 --- a/htsget-search/src/from_storage.rs +++ b/htsget-search/src/from_storage.rs @@ -75,14 +75,14 @@ impl ResolveResponse for HtsGetFromStorage { #[cfg(feature = "s3-storage")] async fn from_s3(s3_storage: &S3StorageConfig, query: &Query) -> Result { let storage = Storage::from_s3(s3_storage).await; - let searcher = HtsGetFromStorage::new(storage); + let searcher = HtsGetFromStorage::new(storage?); searcher.search(query.clone()).await } #[cfg(feature = "url-storage")] async fn from_url(url_storage_config: &UrlStorageConfig, query: &Query) -> Result { let storage = Storage::from_url(url_storage_config).await; - let searcher = HtsGetFromStorage::new(storage); + let searcher = HtsGetFromStorage::new(storage?); searcher.search(query.clone()).await } } @@ -106,7 +106,6 @@ pub(crate) mod tests { use std::fs; use std::future::Future; use std::path::{Path, PathBuf}; - use std::sync::Arc; #[cfg(feature = "s3-storage")] use { htsget_storage::s3::S3Storage, htsget_test::aws_mocks::with_s3_test_server, std::fs::create_dir, @@ -137,8 +136,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_bam() { with_bam_local_storage(|storage| async move { - let storage = Arc::try_unwrap(storage).unwrap(); - let htsget = HtsGetFromStorage::new(Storage::new(storage)); + let htsget = HtsGetFromStorage::new(storage); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam); let response = htsget.search(query).await; println!("{response:#?}"); @@ -158,8 +156,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_vcf() { with_vcf_local_storage(|storage| async move { - let storage = Arc::try_unwrap(storage).unwrap(); - let htsget = HtsGetFromStorage::new(Storage::new(storage)); + let htsget = HtsGetFromStorage::new(storage); let filename = "spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Vcf); let response = htsget.search(query).await; @@ -305,12 +302,12 @@ pub(crate) mod tests { pub(crate) async fn with_local_storage_fn(test: F, path: &str, copy_files: &[&str]) where - F: FnOnce(Arc>) -> Fut, + F: FnOnce(Storage) -> Fut, Fut: Future>, { with_config_local_storage( |base_path, local_storage| async { - test(Arc::new( + test(Storage::new( LocalStorage::new(base_path, local_storage).unwrap(), )) .await @@ -324,12 +321,12 @@ pub(crate) mod tests { #[cfg(feature = "experimental")] pub(crate) async fn with_local_storage_c4gh(test: F) where - F: FnOnce(Arc>) -> Fut, + F: FnOnce(Storage) -> Fut, Fut: Future>, { with_config_local_storage_map( |base_path, local_storage| async { - test(Arc::new( + test(Storage::new( LocalStorage::new(base_path, local_storage).unwrap(), )) .await @@ -344,7 +341,7 @@ pub(crate) mod tests { #[cfg(feature = "s3-storage")] pub(crate) async fn with_aws_storage_fn(test: F, path: &str, copy_files: &[&str]) where - F: FnOnce(Arc) -> Fut, + F: FnOnce(Storage) -> Fut, Fut: Future>, { let tmp_dir = TempDir::new().unwrap(); @@ -367,11 +364,11 @@ pub(crate) mod tests { #[cfg(feature = "s3-storage")] pub(crate) async fn with_aws_s3_storage_fn(test: F, folder_name: String, base_path: &Path) where - F: FnOnce(Arc) -> Fut, + F: FnOnce(Storage) -> Fut, Fut: Future, { with_s3_test_server(base_path, |client| async move { - test(Arc::new(S3Storage::new(client, folder_name))).await; + test(Storage::new(S3Storage::new(client, folder_name))).await; }) .await; } diff --git a/htsget-search/src/vcf_search.rs b/htsget-search/src/vcf_search.rs index 8508c067..032e0c27 100644 --- a/htsget-search/src/vcf_search.rs +++ b/htsget-search/src/vcf_search.rs @@ -119,11 +119,9 @@ impl VcfSearch { #[cfg(test)] pub(crate) mod tests { - use htsget_config::storage::local::Local as ConfigLocalStorage; use htsget_config::types::Class::Body; use htsget_test::http::concat::ConcatResponse; use std::future::Future; - use std::sync::Arc; use super::*; #[cfg(feature = "s3-storage")] @@ -131,7 +129,6 @@ pub(crate) mod tests { use crate::from_storage::tests::with_local_storage_fn; use crate::search::SearchAll; use crate::{Class::Header, Headers, HtsGetError::NotFound, Response, Url}; - use htsget_storage::local::LocalStorage; #[cfg(feature = "experimental")] use { crate::from_storage::tests::with_local_storage_c4gh, @@ -146,7 +143,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_all_variants() { with_local_storage(|storage| async move { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Vcf); let response = search.search(query).await; @@ -166,7 +163,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_without_seq_range() { with_local_storage(|storage| async move { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let filename = "spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Vcf).with_reference_name("20"); let response = search.search(query).await; @@ -196,7 +193,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_no_end_position() { with_local_storage(|storage| async move { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Vcf) .with_reference_name("chrM") @@ -232,7 +229,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_header() { with_local_storage(|storage| async move { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let filename = "spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Vcf).with_class(Header); let response = search.search(query).await; @@ -258,7 +255,7 @@ pub(crate) mod tests { async fn search_non_existent_id_reference_name() { with_local_storage_fn( |storage| async move { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -275,7 +272,7 @@ pub(crate) mod tests { async fn search_non_existent_id_all_reads() { with_local_storage_fn( |storage| async move { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_reference_name("chrM"); let response = search.search(query).await; @@ -293,7 +290,7 @@ pub(crate) mod tests { async fn search_non_existent_id_header() { with_local_storage_fn( |storage| async move { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_class(Header); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -309,7 +306,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_header_with_non_existent_reference_name() { with_local_storage(|storage| async move { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_reference_name("chr1"); let response = search.search(query).await; @@ -326,7 +323,7 @@ pub(crate) mod tests { async fn get_header_end_offset() { with_local_storage_fn( |storage| async move { - let search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let search = VcfSearch::new(storage); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_class(Header); let index = search.read_index(&query).await.unwrap(); @@ -347,7 +344,7 @@ pub(crate) mod tests { async fn search_non_existent_id_reference_name_aws() { with_aws_storage_fn( |storage| async move { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf); let response = search.search(query).await; assert!(response.is_err()); @@ -365,7 +362,7 @@ pub(crate) mod tests { async fn search_non_existent_id_all_reads_aws() { with_aws_storage_fn( |storage| async move { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_reference_name("chrM"); let response = search.search(query).await; @@ -384,7 +381,7 @@ pub(crate) mod tests { async fn search_non_existent_id_header_aws() { with_aws_storage_fn( |storage| async move { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_class(Header); let response = search.search(query).await; assert!(response.is_err()); @@ -401,7 +398,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_all_c4gh() { with_local_storage_c4gh(|storage| async move { - let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let storage = C4GHStorage::new(get_decryption_keys().await, storage); let mut search = VcfSearch::new(Storage::new(storage)); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf); let response = search.search(query).await.unwrap(); @@ -417,7 +414,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_all_range_c4gh() { with_local_storage_c4gh(|storage| async move { - let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let storage = C4GHStorage::new(get_decryption_keys().await, storage); let mut search = VcfSearch::new(Storage::new(storage)); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf) .with_reference_name("20") @@ -433,9 +430,9 @@ pub(crate) mod tests { } async fn test_reference_name_with_seq_range( - storage: Arc>, + storage: Storage, ) -> Option<(String, ConcatResponse)> { - let mut search = VcfSearch::new(Storage::new(Arc::try_unwrap(storage).unwrap())); + let mut search = VcfSearch::new(storage); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Vcf) .with_reference_name("chrM") @@ -463,7 +460,7 @@ pub(crate) mod tests { pub(crate) async fn with_local_storage(test: F) where - F: FnOnce(Arc>) -> Fut, + F: FnOnce(Storage) -> Fut, Fut: Future>, { with_local_storage_fn(test, "data/vcf", &[]).await diff --git a/htsget-storage/Cargo.toml b/htsget-storage/Cargo.toml index 1c214b8b..576cc1ce 100644 --- a/htsget-storage/Cargo.toml +++ b/htsget-storage/Cargo.toml @@ -34,7 +34,7 @@ http = "1" cfg-if = "1" # Async -tokio = { version = "1", features = ["macros", "rt-multi-thread"] } +tokio = { version = "1", features = ["macros", "rt-multi-thread", "io-util"] } tokio-util = { version = "0.7", features = ["io", "compat"] } futures = { version = "0.3" } futures-util = "0.3" diff --git a/htsget-storage/src/c4gh/edit.rs b/htsget-storage/src/c4gh/edit.rs index 4107cbb8..d610e788 100644 --- a/htsget-storage/src/c4gh/edit.rs +++ b/htsget-storage/src/c4gh/edit.rs @@ -165,7 +165,7 @@ impl<'a> EditHeader<'a> { /// Add edit lists and return a header packet. pub fn reencrypt_header(self) -> Result
{ - if self.current_header.contains_edit_list { + if self.current_header.contains_edit_list() { return Err(StorageError::IoError( "edit lists already exist".to_string(), io::Error::other(Crypt4GHError::TooManyEditListPackets), @@ -242,7 +242,7 @@ mod tests { src.read_to_end(&mut buf).unwrap(); let mut buf = BufReader::new(Cursor::new(buf)); - let keys = get_decryption_keys(); + let keys = get_decryption_keys().await; let edit = EditHeader::new( test_unencrypted_positions(), diff --git a/htsget-storage/src/c4gh/mod.rs b/htsget-storage/src/c4gh/mod.rs index 48857a7b..1f615bb7 100644 --- a/htsget-storage/src/c4gh/mod.rs +++ b/htsget-storage/src/c4gh/mod.rs @@ -24,8 +24,7 @@ pub struct DeserializedHeader { pub(crate) header_info: HeaderInfo, pub(crate) session_keys: Vec>, pub(crate) header_size: u64, - pub(crate) contains_edit_list: bool, - pub(crate) decrypted_stream: Vec, + pub(crate) edit_list: Option>, } impl Clone for DeserializedHeader { @@ -38,8 +37,7 @@ impl Clone for DeserializedHeader { }, session_keys: self.session_keys.clone(), header_size: self.header_size, - contains_edit_list: self.contains_edit_list, - decrypted_stream: self.decrypted_stream.clone(), + edit_list: self.edit_list.clone(), } } } @@ -50,24 +48,19 @@ impl DeserializedHeader { header_info: HeaderInfo, session_keys: Vec>, header_size: u64, - contains_edit_list: bool, - decrypted_stream: Vec, + edit_list: Option>, ) -> Self { Self { header_info, session_keys, header_size, - contains_edit_list, - decrypted_stream, + edit_list, } } /// Grab all the required information from the header. /// This is more or less directly copied from https://github.com/EGA-archive/crypt4gh-rust/blob/2d41a1770067003bc67ab499841e0def186ed218/src/lib.rs#L283-L314 - pub fn from_buffer( - read_buffer: &mut R, - keys: &[Keys], - ) -> Result { + pub fn from_buffer(read_buffer: &mut R, keys: &[Keys]) -> Result { // Get header info let mut temp_buf = [0_u8; 16]; // Size of the header read_buffer @@ -113,16 +106,40 @@ impl DeserializedHeader { } = header::deconstruct_header_body(encrypted_packets, keys, &None)?; let header_size = 16 + header_lengths; - let contains_header = edit_list_packet.is_some(); + Ok(DeserializedHeader::new( + header_info, + session_keys, + header_size as u64, + edit_list_packet, + )) + } + + /// Check if an edit list is present. + pub fn contains_edit_list(&self) -> bool { + self.edit_list.is_some() + } +} + +/// Represents the decrypted data from a C4GH file. +#[derive(Debug, Clone)] +pub struct DecryptedData(Vec); + +impl DecryptedData { + /// Decrypt the data from the header and read buffer. The read buffer is expected to be + /// positioned at the start of the encrypted data. + pub fn from_header( + read_buffer: &mut R, + header: DeserializedHeader, + ) -> Result { let mut writer = BufWriter::new(Cursor::new(vec![])); let mut write_info = WriteInfo::new(0, None, &mut writer); - match edit_list_packet { - None => body_decrypt(read_buffer, &session_keys, &mut write_info, 0)?, + match header.edit_list { + None => body_decrypt(read_buffer, &header.session_keys, &mut write_info, 0)?, Some(edit_list_content) => body_decrypt_parts( read_buffer, - session_keys.clone(), + header.session_keys, write_info, edit_list_content, )?, @@ -133,13 +150,12 @@ impl DeserializedHeader { .map_err(|err| Crypt4GHError::IoError(io::Error::other(err)))? .into_inner(); - Ok(DeserializedHeader::new( - header_info, - session_keys, - header_size as u64, - contains_header, - data, - )) + Ok(Self(data)) + } + + /// Get the inner data. + pub fn into_inner(self) -> Vec { + self.0 } } diff --git a/htsget-storage/src/c4gh/storage.rs b/htsget-storage/src/c4gh/storage.rs index 6eeea139..9076de6d 100644 --- a/htsget-storage/src/c4gh/storage.rs +++ b/htsget-storage/src/c4gh/storage.rs @@ -4,7 +4,7 @@ use crate::c4gh::edit::{ClampedPosition, EditHeader, UnencryptedPosition}; use crate::c4gh::{ to_unencrypted_file_size, unencrypted_clamp, unencrypted_clamp_next, unencrypted_to_data_block, - unencrypted_to_next_data_block, DeserializedHeader, + unencrypted_to_next_data_block, DecryptedData, DeserializedHeader, }; use crate::error::StorageError::{InternalError, IoError}; use crate::error::{Result, StorageError}; @@ -17,10 +17,11 @@ use async_trait::async_trait; use crypt4gh::error::Crypt4GHError; use crypt4gh::Keys; use htsget_config::types::{Class, Format, Url}; +use std::cmp::min; use std::collections::HashMap; use std::fmt::{Debug, Formatter}; use std::io; -use std::io::{BufReader, Cursor}; +use std::io::{BufReader, Cursor, Read}; use tokio::io::AsyncReadExt; /// Max C4GH header size in bytes. Supports 50 regular sized encrypted packets. 16 + (108 * 50). @@ -33,6 +34,7 @@ pub struct C4GHState { encrypted_file_size: u64, unencrypted_file_size: u64, deserialized_header: DeserializedHeader, + decrypted_data: DecryptedData, } /// Implementation for the [StorageTrait] trait using the local file system for accessing Crypt4GH @@ -92,7 +94,7 @@ impl C4GHStorage { .clone(); Ok(Streamable::from_async_read(Cursor::new( - data.deserialized_header.decrypted_stream, + data.decrypted_data.into_inner(), ))) } @@ -111,33 +113,58 @@ impl C4GHStorage { // Get the file size. let encrypted_file_size = self.inner.head(&key, (&options).into()).await?; - let end = options - .range - .end - .unwrap_or_default() - .checked_add(MAX_C4GH_HEADER_SIZE) - .ok_or_else(|| InternalError("overflow getting header".to_string()))?; - options.range = options.range.with_end(end); + let mut c4gh_header_options = options.clone(); + c4gh_header_options.range.end = Some(min(MAX_C4GH_HEADER_SIZE, encrypted_file_size)); // Also need to determine the header size. let mut buf = vec![]; self .inner - .get(&key, options) + .get(&key, c4gh_header_options) .await? + .take(MAX_C4GH_HEADER_SIZE) .read_to_end(&mut buf) .await?; - let mut reader = BufReader::new(Cursor::new(buf)); + let mut reader = BufReader::new(buf.as_slice()); let deserialized_header = DeserializedHeader::from_buffer(&mut reader, &self.keys)?; let unencrypted_file_size = to_unencrypted_file_size(encrypted_file_size, deserialized_header.header_size); + // Grab remaining bytes after knowing the header size. + let mut remaining = vec![]; + + if encrypted_file_size > MAX_C4GH_HEADER_SIZE { + let end = unencrypted_to_next_data_block( + options.range.end.unwrap_or(encrypted_file_size), + deserialized_header.header_size, + encrypted_file_size, + ); + options.range.start = Some(MAX_C4GH_HEADER_SIZE); + + if end < MAX_C4GH_HEADER_SIZE { + options.range.end = None; + } else { + options.range.end = Some(min(end, encrypted_file_size)); + } + + self + .inner + .get(&key, options) + .await? + .read_to_end(&mut remaining) + .await?; + } + + let mut reader = reader.chain(BufReader::new(remaining.as_slice())); + + let decrypted_data = DecryptedData::from_header(&mut reader, deserialized_header.clone())?; let state = C4GHState { encrypted_file_size, unencrypted_file_size, deserialized_header, + decrypted_data, }; self.state.insert(key, state); @@ -297,8 +324,6 @@ mod tests { use http::HeaderMap; use std::future::Future; use std::path::Path; - #[cfg(feature = "s3-storage")] - use std::sync::Arc; use tokio::fs::{read, File}; use tokio::io::AsyncWriteExt; @@ -434,7 +459,7 @@ mod tests { with_s3_c4gh_storage(|mut storage| async move { test_range_url( &mut storage, - "http://folder.localhost:8014/key.c4gh", + "http://folder.localhost:0/key.c4gh", "key", &Default::default(), ) @@ -578,7 +603,7 @@ mod tests { { with_local_storage(|storage, base_path| async move { create_encrypted_files(&base_path).await; - test(C4GHStorage::new(get_decryption_keys(), storage)).await; + test(C4GHStorage::new(get_decryption_keys().await, storage)).await; }) .await; } @@ -591,11 +616,7 @@ mod tests { { with_aws_s3_storage(|storage, base_path| async move { create_encrypted_files(&base_path).await; - test(C4GHStorage::new( - get_decryption_keys(), - Arc::try_unwrap(storage).unwrap(), - )) - .await; + test(C4GHStorage::new(get_decryption_keys().await, storage)).await; }) .await; } @@ -608,7 +629,7 @@ mod tests { { with_url_test_server(|storage, url, base_path| async move { create_encrypted_files(&base_path).await; - test(C4GHStorage::new(get_decryption_keys(), storage), url).await; + test(C4GHStorage::new(get_decryption_keys().await, storage), url).await; }) .await; } diff --git a/htsget-storage/src/lib.rs b/htsget-storage/src/lib.rs index c3470086..8e97e9e5 100644 --- a/htsget-storage/src/lib.rs +++ b/htsget-storage/src/lib.rs @@ -139,14 +139,18 @@ impl StorageTrait for Storage { impl Storage { #[cfg(feature = "experimental")] /// Wrap an existing storage with C4GH storage - pub fn from_c4gh_keys(keys: Option<&C4GHKeys>, storage: Storage) -> Storage { + pub async fn from_c4gh_keys(keys: Option<&C4GHKeys>, storage: Storage) -> Result { if let Some(keys) = keys { - Storage::new(C4GHStorage::new_box( - keys.clone().into_inner(), + Ok(Storage::new(C4GHStorage::new_box( + keys + .clone() + .keys() + .await + .map_err(|err| StorageError::InternalError(err.to_string()))?, storage.into_inner(), - )) + ))) } else { - storage + Ok(storage) } } @@ -159,7 +163,7 @@ impl Storage { cfg_if! { if #[cfg(feature = "experimental")] { - Ok(Self::from_c4gh_keys(local_storage.keys(), storage)) + Self::from_c4gh_keys(local_storage.keys(), storage).await } else { Ok(storage) } @@ -168,28 +172,28 @@ impl Storage { /// Create from s3 config. #[cfg(feature = "s3-storage")] - pub async fn from_s3(s3_storage: &S3StorageConfig) -> Storage { + pub async fn from_s3(s3_storage: &S3StorageConfig) -> Result { let storage = Storage::new( S3Storage::new_with_default_config( s3_storage.bucket().to_string(), - s3_storage.clone().endpoint(), - s3_storage.clone().path_style(), + s3_storage.endpoint().map(str::to_string), + s3_storage.path_style(), ) .await, ); cfg_if! { if #[cfg(feature = "experimental")] { - Self::from_c4gh_keys(s3_storage.keys(), storage) + Self::from_c4gh_keys(s3_storage.keys(), storage).await } else { - storage + Ok(storage) } } } /// Create from url config. #[cfg(feature = "url-storage")] - pub async fn from_url(url_storage: &UrlStorageConfig) -> Storage { + pub async fn from_url(url_storage: &UrlStorageConfig) -> Result { let storage = Storage::new(UrlStorage::new( url_storage.client_cloned(), url_storage.url().clone(), @@ -200,9 +204,9 @@ impl Storage { cfg_if! { if #[cfg(feature = "experimental")] { - Self::from_c4gh_keys(url_storage.keys(), storage) + Self::from_c4gh_keys(url_storage.keys(), storage).await } else { - storage + Ok(storage) } } } diff --git a/htsget-storage/src/local.rs b/htsget-storage/src/local.rs index f94e8e32..1aacbeb1 100644 --- a/htsget-storage/src/local.rs +++ b/htsget-storage/src/local.rs @@ -2,7 +2,7 @@ //! use std::fmt::Debug; -use std::io::ErrorKind; +use std::io::{ErrorKind, SeekFrom}; use std::path::{Path, PathBuf}; use crate::{HeadOptions, StorageMiddleware, StorageTrait, UrlFormatter}; @@ -10,6 +10,7 @@ use crate::{Streamable, Url as HtsGetUrl}; use async_trait::async_trait; use tokio::fs; use tokio::fs::File; +use tokio::io::AsyncSeekExt; use tracing::debug; use tracing::instrument; use url::Url; @@ -84,9 +85,16 @@ impl StorageMiddleware for LocalStorage StorageTrait for LocalStorage { /// Get the file at the location of the key. #[instrument(level = "debug", skip(self))] - async fn get(&self, key: &str, _options: GetOptions<'_>) -> Result { + async fn get(&self, key: &str, options: GetOptions<'_>) -> Result { debug!(calling_from = ?self, key = key, "getting file with key {:?}", key); - Ok(Streamable::from_async_read(self.get(key).await?)) + + // Need to ensure range options are considered for local files. + let mut file = self.get(key).await?; + file + .seek(SeekFrom::Start(options.range.start.unwrap_or(0))) + .await?; + + Ok(Streamable::from_async_read(file)) } /// Get a url for the file at key. diff --git a/htsget-storage/src/s3.rs b/htsget-storage/src/s3.rs index 79d9fff5..7bcab6a2 100644 --- a/htsget-storage/src/s3.rs +++ b/htsget-storage/src/s3.rs @@ -292,7 +292,6 @@ impl StorageTrait for S3Storage { pub(crate) mod tests { use std::future::Future; use std::path::{Path, PathBuf}; - use std::sync::Arc; use htsget_test::aws_mocks::with_s3_test_server; @@ -305,22 +304,18 @@ pub(crate) mod tests { pub(crate) async fn with_aws_s3_storage_fn(test: F, folder_name: String, base_path: &Path) where - F: FnOnce(Arc, PathBuf) -> Fut, + F: FnOnce(S3Storage, PathBuf) -> Fut, Fut: Future, { with_s3_test_server(base_path, |client| async move { - test( - Arc::new(S3Storage::new(client, folder_name)), - base_path.to_path_buf(), - ) - .await; + test(S3Storage::new(client, folder_name), base_path.to_path_buf()).await; }) .await; } pub(crate) async fn with_aws_s3_storage(test: F) where - F: FnOnce(Arc, PathBuf) -> Fut, + F: FnOnce(S3Storage, PathBuf) -> Fut, Fut: Future, { let (folder_name, base_path) = create_local_test_files().await; @@ -365,7 +360,7 @@ pub(crate) mod tests { ) .await .unwrap(); - assert!(result.url.starts_with("http://folder.localhost:8014/key2")); + assert!(result.url.starts_with("http://folder.localhost:0/key2")); assert!(result.url.contains(&format!( "Amz-Expires={}", S3Storage::PRESIGNED_REQUEST_EXPIRY @@ -387,7 +382,7 @@ pub(crate) mod tests { ) .await .unwrap(); - assert!(result.url.starts_with("http://folder.localhost:8014/key2")); + assert!(result.url.starts_with("http://folder.localhost:0/key2")); assert!(result.url.contains(&format!( "Amz-Expires={}", S3Storage::PRESIGNED_REQUEST_EXPIRY @@ -411,7 +406,7 @@ pub(crate) mod tests { ) .await .unwrap(); - assert!(result.url.starts_with("http://folder.localhost:8014/key2")); + assert!(result.url.starts_with("http://folder.localhost:0/key2")); assert!(result.url.contains(&format!( "Amz-Expires={}", S3Storage::PRESIGNED_REQUEST_EXPIRY diff --git a/htsget-test/src/aws_mocks.rs b/htsget-test/src/aws_mocks.rs index a70a0cc1..f531414f 100644 --- a/htsget-test/src/aws_mocks.rs +++ b/htsget-test/src/aws_mocks.rs @@ -10,9 +10,10 @@ use std::future::Future; use std::path::{Path, PathBuf}; use tempfile::TempDir; -/// Default domain to use for mock s3 server -pub const DEFAULT_DOMAIN_NAME: &str = "localhost:8014"; -/// Default region to use for mock s3 server +/// Default domain to use for mock s3 server. +pub const DEFAULT_DOMAIN_NAME: &str = "localhost:0"; + +/// Default region to use for mock s3 server. pub const DEFAULT_REGION: &str = "ap-southeast-2"; /// Run a mock s3 server using the `server_base_path` and a test function. Specify the domain name and region to use for the mock server. diff --git a/htsget-test/src/c4gh.rs b/htsget-test/src/c4gh.rs index cb402764..1489b34f 100644 --- a/htsget-test/src/c4gh.rs +++ b/htsget-test/src/c4gh.rs @@ -1,7 +1,7 @@ use crate::util::default_dir; use crypt4gh::keys::{get_private_key, get_public_key}; use crypt4gh::{decrypt, encrypt, Keys}; -use htsget_config::storage::c4gh::{C4GHKeys, C4GHPath}; +use htsget_config::storage::c4gh::{local::C4GHLocal, C4GHKeys}; use std::collections::HashSet; use std::io::{BufReader, BufWriter, Cursor}; @@ -59,10 +59,10 @@ pub fn encrypt_data(data: &[u8]) -> Vec { writer.into_inner().unwrap().into_inner() } -pub fn get_decryption_keys() -> Vec { +pub async fn get_decryption_keys() -> Vec { let private_key = default_dir().join("data/c4gh/keys/bob.sec"); let public_key = default_dir().join("data/c4gh/keys/alice.pub"); - let keys = C4GHKeys::try_from(C4GHPath::new(private_key, public_key)).unwrap(); + let keys = C4GHKeys::try_from(C4GHLocal::new(private_key, public_key)).unwrap(); - keys.into_inner() + keys.keys().await.unwrap() }