diff --git a/adr/20250929-repository-directory-traversal.md b/adr/20250929-repository-directory-traversal.md new file mode 100644 index 0000000000..e24d3157d9 --- /dev/null +++ b/adr/20250929-repository-directory-traversal.md @@ -0,0 +1,84 @@ + # ADR: Repository Directory Traversal API + +**Date**: 2025-09-29 +**Status**: Accepted +**Context**: Need for standardized directory listing across Git hosting providers + +## Decision + +Introduce a `listDirectory(String path, int depth)` method to the `RepositoryProvider` abstraction to enable unified directory traversal across different Git hosting platforms. + +## Context + +Nextflow requires the ability to explore repository directory structures across multiple Git hosting providers (GitHub, GitLab, Bitbucket, Azure DevOps, Gitea) without full repository clones. Each provider has different API capabilities and constraints for directory listing operations. + +## Technical Implementation + +### Core Algorithm + +All providers follow a consistent pattern: +1. **Path Resolution**: Normalize path to provider API format +2. **Strategy Selection**: Choose recursive vs iterative approach based on API capabilities +3. **HTTP Request**: Execute provider-specific API calls +4. **Response Processing**: Parse to standardized `RepositoryEntry` objects +5. **Depth Filtering**: Apply client-side limits when APIs lack precise depth control + +### API Strategy Classification + +**Strategy A: Native Recursive (GitHub, GitLab, Azure)** +- Single HTTP request with recursive parameters +- Server-side tree traversal +- Performance: O(1) API calls + +**Strategy B: Iterative Traversal (Bitbucket Server, Gitea)** +- Multiple HTTP requests per directory level +- Client-side recursion management +- Performance: O(n) API calls where n = number of directories + +**Strategy C: Limited Support (Bitbucket Cloud)** +- Single-level listing only +- Throws exceptions for depth > 1 + +### Provider Implementation Details + +| Provider | Endpoint | Recursive Support | Performance | +|----------|----------|-------------------|-------------| +| GitHub | `/git/trees/{sha}?recursive=1` | Native | Optimal | +| GitLab | `/repository/tree?recursive=true` | Native | Optimal | +| Azure | `/items?recursionLevel=Full` | Native | Optimal | +| Bitbucket Server | `/browse/{path}` | Manual iteration | Multiple calls | +| Gitea | `/contents/{path}` | Manual iteration | Multiple calls | +| Bitbucket Cloud | `/src/{commit}/{path}` | None | Unsupported | + +### HTTP API Constraints + +- **Rate Limiting**: 60-5000 requests/hour depending on provider and authentication +- **Response Size**: Controlled by `NXF_GIT_RESPONSE_MAX_LENGTH` environment variable +- **Timeouts**: 60-second connect timeout across all providers +- **Authentication**: Required for private repositories and higher rate limits + +## Consequences + +### Positive +- **Unified Interface**: Consistent API across all Git hosting providers +- **Performance Optimization**: Uses native recursive APIs where available +- **Graceful Degradation**: Falls back to iterative traversal when needed +- **Error Resilience**: Handles partial failures and API limitations + +### Negative +- **Provider Inconsistency**: Performance varies significantly between providers +- **API Rate Limits**: Multiple calls required for some providers may hit limits faster +- **Memory Usage**: Large directory structures loaded entirely into memory + +### Neutral +- **Complexity**: Abstraction layer adds code complexity but improves maintainability +- **Testing**: Comprehensive test coverage required for each provider implementation + +## Implementation Notes + +- Local Git repositories use JGit TreeWalk for optimal performance +- Client-side depth filtering ensures consistent behavior across providers +- Error handling varies by provider: some return empty lists, others throw exceptions +- Future enhancements could include caching based on commit SHA and pagination support + +This decision enables Nextflow to efficiently explore repository structures regardless of the underlying Git hosting platform, with automatic optimization based on each provider's API capabilities. \ No newline at end of file diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/AzureRepositoryProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/AzureRepositoryProvider.groovy index ea6c504d76..172a3fe7ff 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/AzureRepositoryProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/AzureRepositoryProvider.groovy @@ -214,4 +214,87 @@ final class AzureRepositoryProvider extends RepositoryProvider { return invokeBytes(url) } + /** {@inheritDoc} */ + @Override + List listDirectory(String path, int depth) { + // Build the Items API URL + def normalizedPath = normalizePath(path) + // For Azure API, root directory should be represented as "/" not empty string + if (!normalizedPath) { + normalizedPath = "/" + } + + def queryParams = [ + 'recursionLevel': depth > 1 ? 'Full' : 'OneLevel', // Use Full for depth > 1 to get nested content + "api-version": 6.0, + '$format': 'json' + ] as Map + + // Only add scopePath if it's not the root directory + if (normalizedPath != "/") { + queryParams['scopePath'] = normalizedPath + } + + if (revision) { + queryParams['versionDescriptor.version'] = revision + if (COMMIT_REGEX.matcher(revision).matches()) { + queryParams['versionDescriptor.versionType'] = 'commit' + } + } + + def queryString = queryParams.collect({ "$it.key=$it.value"}).join('&') + def url = "$endpointUrl/items?$queryString" + + try { + Map response = invokeAndParseResponse(url) + List items = response?.value as List + + if (!items) { + return [] + } + + List entries = [] + + for (Map item : items) { + // Skip the root directory itself + String itemPath = item.get('path') as String + if (itemPath == path || (!path && itemPath == "/")) { + continue + } + + // Filter entries based on depth using base class helper + if (shouldIncludeAtDepth(itemPath, path, depth)) { + entries.add(createRepositoryEntry(item, path)) + } + } + + return entries.sort { it.name } + + } catch (Exception e) { + // Azure Items API may have different permissions or availability than other APIs + // Return empty list to allow graceful degradation + return [] + } + } + + private RepositoryEntry createRepositoryEntry(Map item, String basePath) { + String itemPath = item.get('path') as String + String name = itemPath?.split('/')?.last() ?: "unknown" + + // Determine type based on Azure's gitObjectType + String gitObjectType = item.get('gitObjectType') as String + EntryType type = (gitObjectType == 'tree') ? EntryType.DIRECTORY : EntryType.FILE + + String sha = item.get('objectId') as String + Long size = item.get('size') as Long + + return new RepositoryEntry( + name: name, + path: itemPath, + type: type, + sha: sha, + size: size + ) + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/BitbucketRepositoryProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/BitbucketRepositoryProvider.groovy index 2f900e5d01..850ffac15d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/BitbucketRepositoryProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/BitbucketRepositoryProvider.groovy @@ -193,4 +193,65 @@ final class BitbucketRepositoryProvider extends RepositoryProvider { final url = getContentUrl(path) return invokeBytes(url) } + + /** {@inheritDoc} */ + @Override + List listDirectory(String path, int depth) { + final ref = revision ? getRefForRevision(revision) : getMainBranch() + // Normalize path using base class helper + final dirPath = normalizePath(path) + + // Build the src API URL - BitBucket's src endpoint returns directory listings when path is a directory + String url = "${config.endpoint}/2.0/repositories/$project/src/$ref/$dirPath" + + try { + // Make the API call + Map response = invokeAndParseResponse(url) + List values = response?.values as List + + if (!values) { + return [] + } + + List entries = [] + + for (Map entry : values) { + String entryPath = entry.get('path') as String + // Filter entries based on depth using base class helper + if (shouldIncludeAtDepth(entryPath, path, depth)) { + entries.add(createRepositoryEntry(entry, path)) + } + } + + return entries.sort { it.name } + + } catch (Exception e) { + // If API call fails, it might be because the path is not a directory + // or the API doesn't support directory listing + throw new UnsupportedOperationException("Directory listing not supported by BitBucket API for path: $path", e) + } + } + + private RepositoryEntry createRepositoryEntry(Map entry, String basePath) { + String entryPath = entry.get('path') as String + String name = entryPath?.split('/')?.last() ?: entry.get('name') as String + + // Determine type based on BitBucket's response + String type = entry.get('type') as String + EntryType entryType = (type == 'commit_directory') ? EntryType.DIRECTORY : EntryType.FILE + + String sha = entry.get('commit')?.get('hash') as String + Long size = entry.get('size') as Long + + // Ensure absolute path using base class helper + String fullPath = ensureAbsolutePath(entryPath) + + return new RepositoryEntry( + name: name, + path: fullPath, + type: entryType, + sha: sha, + size: size + ) + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/BitbucketServerRepositoryProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/BitbucketServerRepositoryProvider.groovy index 0bc0220bd4..3455988ac3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/BitbucketServerRepositoryProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/BitbucketServerRepositoryProvider.groovy @@ -111,6 +111,12 @@ final class BitbucketServerRepositoryProvider extends RepositoryProvider { return invokeBytes(url) } + /** {@inheritDoc} */ + @Override + List listDirectory(String path, int depth) { + throw new UnsupportedOperationException("BitbucketServerRepositoryProvider does not support 'listDirectory' operation") + } + @Override List getTags() { final result = new ArrayList() diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/GiteaRepositoryProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/GiteaRepositoryProvider.groovy index b47901cac0..4c93c1427a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/GiteaRepositoryProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/GiteaRepositoryProvider.groovy @@ -19,11 +19,13 @@ package nextflow.scm import groovy.transform.CompileDynamic import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j /** * Implements a repository provider for Gitea service * * @author Akira Sekiguchi */ +@Slf4j @CompileStatic final class GiteaRepositoryProvider extends RepositoryProvider { @@ -113,4 +115,118 @@ final class GiteaRepositoryProvider extends RepositoryProvider { return invokeBytes(url) } + /** {@inheritDoc} */ + @Override + List listDirectory(String path, int depth) { + final branch = revision ?: "master" + // Normalize path using base class helper + final dirPath = normalizePath(path) + + // Build the contents API URL - Gitea follows GitHub-like API pattern + String url = "${config.endpoint}/repos/$project/contents" + if (dirPath) { + url += "/$dirPath" + } + url += "?ref=$branch" + + try { + // Make the API call + def response = invoke(url) + List contents = new groovy.json.JsonSlurper().parseText(response) as List + + if (!contents) { + return [] + } + + List entries = [] + + for (Map entry : contents) { + String entryPath = entry.get('path') as String + // Filter entries based on depth using base class helper + if (shouldIncludeAtDepth(entryPath, path, depth)) { + entries.add(createRepositoryEntry(entry)) + } + } + + // If depth > 1, we need to recursively get subdirectory contents + if (depth > 1) { + for (Map entry : contents) { + if (entry.get('type') == 'dir') { + String entryName = entry.get('name') as String + String subPath = dirPath ? "$dirPath/$entryName" : entryName + entries.addAll(getRecursiveEntries(subPath, depth, branch, 2)) + } + } + } + + return entries.sort { it.name } + + } catch (Exception e) { + throw new UnsupportedOperationException("Directory listing failed for Gitea path: $path", e) + } + } + + private List getRecursiveEntries(String basePath, int maxDepth, String branch, int currentDepth) { + if (currentDepth > maxDepth) { + return [] + } + + List allEntries = [] + + // Get current level entries first + final normalizedBasePath = normalizePath(basePath) + String url = "${config.endpoint}/repos/$project/contents" + if (normalizedBasePath) { + url += "/$normalizedBasePath" + } + url += "?ref=$branch" + + try { + def response = invoke(url) + List contents = new groovy.json.JsonSlurper().parseText(response) as List + + for (Map entry : contents) { + String entryPath = entry.get('path') as String + + // Add entries from the current level that match the depth criteria + if (shouldIncludeAtDepth(entryPath, basePath, maxDepth)) { + allEntries.add(createRepositoryEntry(entry)) + } + + // Recurse into subdirectories if we haven't reached max depth + if (entry.get('type') == 'dir' && currentDepth < maxDepth) { + String entryName = entry.get('name') as String + String subPath = normalizedBasePath ? "$normalizedBasePath/$entryName" : entryName + allEntries.addAll(getRecursiveEntries(subPath, maxDepth, branch, currentDepth + 1)) + } + } + } catch (Exception e) { + log.debug("Failed to process directory during recursive listing: ${e.message}") + // Continue processing other directories if one fails + } + + return allEntries + } + + private RepositoryEntry createRepositoryEntry(Map entry) { + String name = entry.get('name') as String + String path = entry.get('path') as String + String type = entry.get('type') as String + + EntryType entryType = (type == 'dir') ? EntryType.DIRECTORY : EntryType.FILE + String sha = entry.get('sha') as String + Long size = entry.get('size') as Long + + // Ensure absolute path using base class helper + String fullPath = ensureAbsolutePath(path) + + return new RepositoryEntry( + name: name, + path: fullPath, + type: entryType, + sha: sha, + size: size + ) + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/GithubRepositoryProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/GithubRepositoryProvider.groovy index f48bcd5341..d40eea2ba9 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/GithubRepositoryProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/GithubRepositoryProvider.groovy @@ -117,4 +117,140 @@ class GithubRepositoryProvider extends RepositoryProvider { response.get('content')?.toString()?.decodeBase64() } + /** {@inheritDoc} */ + @Override + @Memoized + List listDirectory(String path, int depth) { + // Get the tree SHA for the specific directory + String treeSha = getTreeSha(path) + + // Build the Trees API URL + String url = getTreeUrl(treeSha, depth > 1) + + // Make the API call and parse response + Map response = invokeAndParseResponse(url) + List treeEntries = response.get('tree') as List + + if (!treeEntries) { + return [] + } + + List entries = [] + + for (Map entry : treeEntries) { + String entryPath = entry.get('path') as String + + // Include if within depth limit: depth=0 includes immediate children only, + // depth=1 includes children+grandchildren, depth=2 includes children+grandchildren+great-grandchildren, etc. + int entryDepth = entryPath.split("/").length - 1 + if (depth == -1 || entryDepth <= depth) { + entries.add(createRepositoryEntry(entry, path)) + } + } + + return entries.sort { it.name } + } + + private String getTreeUrl(String treeSha, boolean recursive) { + String url = "${config.endpoint}/repos/$project/git/trees/$treeSha" + if (recursive) { + url += "?recursive=1" + } + return url + } + + @Memoized + private String getTreeSha(String path) { + // Normalize path using base class helper + def normalizedPath = normalizePath(path) + + if (normalizedPath && !normalizedPath.isEmpty()) { + // For subdirectory, we need to find the tree SHA by traversing from root + return getTreeShaForPath(normalizedPath) + } + + // For root directory, get the commit SHA and then the tree SHA + String commitSha = getCommitSha() + Map commit = invokeAndParseResponse("${config.endpoint}/repos/$project/git/commits/$commitSha") + Map tree = commit.get('tree') as Map + return tree.get('sha') as String + } + + private String getTreeShaForPath(String path) { + // Start from root tree + String currentTreeSha = getTreeSha("") + String[] pathParts = path.split("/") + + for (String part : pathParts) { + String url = getTreeUrl(currentTreeSha, false) + Map response = invokeAndParseResponse(url) + List treeEntries = response.get('tree') as List + + Map foundEntry = treeEntries.find { + it.get('path') == part && it.get('type') == 'tree' + } + + if (!foundEntry) { + throw new IllegalArgumentException("Directory not found: $path") + } + + currentTreeSha = foundEntry.get('sha') as String + } + + return currentTreeSha + } + + @Memoized + private String getCommitSha() { + if (revision) { + // Try to resolve the revision to a commit SHA + try { + Map ref = invokeAndParseResponse("${config.endpoint}/repos/$project/git/refs/heads/$revision") + Map object = ref.get('object') as Map + return object.get('sha') as String + } catch (Exception e) { + // If it's not a branch, try as a tag or direct SHA + return revision + } + } + + // Default to main/master branch + try { + Map ref = invokeAndParseResponse("${config.endpoint}/repos/$project/git/refs/heads/main") + Map object = ref.get('object') as Map + return object.get('sha') as String + } catch (Exception e) { + Map ref = invokeAndParseResponse("${config.endpoint}/repos/$project/git/refs/heads/master") + Map object = ref.get('object') as Map + return object.get('sha') as String + } + } + + + private RepositoryEntry createRepositoryEntry(Map entry, String basePath) { + String entryPath = entry.get('path') as String + + // Create absolute path using base class helper + def normalizedBasePath = normalizePath(basePath) + String fullPath = normalizedBasePath && !normalizedBasePath.isEmpty() ? "/${normalizedBasePath}/${entryPath}" : ensureAbsolutePath(entryPath) + + // For name, use just the entry path (which is relative to the directory we're listing) + String name = entryPath + if (entryPath.contains("/")) { + name = entryPath.substring(entryPath.lastIndexOf("/") + 1) + } + + EntryType type = entry.get('type') == 'tree' ? EntryType.DIRECTORY : EntryType.FILE + String sha = entry.get('sha') as String + Long size = entry.get('size') as Long + + return new RepositoryEntry( + name: name, + path: fullPath, + type: type, + sha: sha, + size: size + ) + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/GitlabRepositoryProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/GitlabRepositoryProvider.groovy index 071ec9cd0d..7de273ebec 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/GitlabRepositoryProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/GitlabRepositoryProvider.groovy @@ -16,7 +16,7 @@ package nextflow.scm - +import groovy.json.JsonSlurper import groovy.util.logging.Slf4j /** * Implements a repository provider for GitHub service @@ -124,4 +124,68 @@ class GitlabRepositoryProvider extends RepositoryProvider { Map response = invokeAndParseResponse(url) response.get('content')?.toString()?.decodeBase64() } + + /** {@inheritDoc} */ + @Override + List listDirectory(String path, int depth) { + final ref = revision ?: getDefaultBranch() + final normalizedPath = normalizePath(path) + final encodedPath = normalizedPath ? URLEncoder.encode(normalizedPath, 'utf-8') : "" + + // Build the Tree API URL + String url = "${config.endpoint}/api/v4/projects/${getProjectName()}/repository/tree" + List params = [] + if (ref) params.add("ref=${ref}") + if (encodedPath) params.add("path=${encodedPath}") + + // For GitLab, we use recursive=true for any depth > 1 + if (depth > 1) { + params.add("recursive=true") + } + + if (params) { + url += "?" + params.join("&") + } + + // Make the API call and parse response + String response = invoke(url) + List treeEntries = response ? new JsonSlurper().parseText(response) as List : [] + + if (!treeEntries) { + return [] + } + + List entries = [] + + for (Map entry : treeEntries) { + String entryPath = entry.get('path') as String + + // Filter entries based on depth using base class helper + if (shouldIncludeAtDepth(entryPath, path, depth)) { + entries.add(createRepositoryEntry(entry, path)) + } + } + + return entries.sort { it.name } + } + + private RepositoryEntry createRepositoryEntry(Map entry, String basePath) { + String entryPath = entry.get('path') as String + String name = entry.get('name') as String + + EntryType type = entry.get('type') == 'tree' ? EntryType.DIRECTORY : EntryType.FILE + String sha = entry.get('id') as String + Long size = null // GitLab tree API doesn't provide file size + + // Ensure absolute path using base class helper + String fullPath = ensureAbsolutePath(entryPath) + + return new RepositoryEntry( + name: name, + path: fullPath, + type: type, + sha: sha, + size: size + ) + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/LocalRepositoryProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/LocalRepositoryProvider.groovy index 6c19b45aa2..4483c0f7cc 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/LocalRepositoryProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/LocalRepositoryProvider.groovy @@ -103,6 +103,92 @@ class LocalRepositoryProvider extends RepositoryProvider { } } + @Override + List listDirectory(String path, int depth) { + final git = Git.open(new File(this.path, project)) + try { + final repo = git.getRepository() + def lastCommitId = repo.resolve(Constants.HEAD) + def revWalk = new RevWalk(repo) + def commit = revWalk.parseCommit(lastCommitId) + def tree = commit.getTree() + + def treeWalk = new TreeWalk(repo) + + // Normalize path using base class helper + def normalizedPath = normalizePath(path) + + if (normalizedPath && !normalizedPath.isEmpty()) { + // Navigate to the specific directory first + def dirWalk = TreeWalk.forPath(repo, normalizedPath, tree) + try { + if (!dirWalk || !dirWalk.isSubtree()) { + return [] // Path doesn't exist or is not a directory + } + treeWalk.addTree(dirWalk.getObjectId(0)) + } finally { + dirWalk?.close() + } + } else { + treeWalk.addTree(tree) + } + + // For depth filtering, we need to traverse recursively when depth > 1 + // The shouldIncludeAtDepth filter will handle the actual depth limiting + treeWalk.setRecursive(depth != 1) + + List entries = [] + + while (treeWalk.next()) { + String entryPath = treeWalk.getPathString() + + // Build full path for entries (relative paths need to be prefixed with base path) + String fullPath = normalizedPath && !normalizedPath.isEmpty() ? "/" + normalizedPath + "/" + entryPath : "/" + entryPath + + // Filter by depth using base class helper + if (shouldIncludeAtDepth(fullPath, path, depth)) { + entries.add(createRepositoryEntry(treeWalk, fullPath)) + } + } + + treeWalk.close() + revWalk.close() + + return entries.sort { it.name } + + } finally { + git.close() + } + } + + private RepositoryEntry createRepositoryEntry(TreeWalk treeWalk, String entryPath) { + String name = entryPath.split('/').last() + + // Determine if it's a directory or file based on file mode + EntryType type = treeWalk.isSubtree() ? EntryType.DIRECTORY : EntryType.FILE + String sha = treeWalk.getObjectId(0).name() + + // For files, try to get size + Long size = null + if (type == EntryType.FILE) { + try { + def objectId = treeWalk.getObjectId(0) + def loader = treeWalk.getObjectReader().open(objectId) + size = loader.getSize() + } catch (Exception e) { + // Size not available, leave as null + } + } + + return new RepositoryEntry( + name: name, + path: entryPath, + type: type, + sha: sha, + size: size + ) + } + @Override List getTags() { final String prefix = 'refs/tags/' diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryProvider.groovy index 42df621c2f..15dfd6e78b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryProvider.groovy @@ -66,6 +66,19 @@ abstract class RepositoryProvider { String commitId } + enum EntryType { + FILE, DIRECTORY + } + + @Canonical + static class RepositoryEntry { + String name + String path + EntryType type + String sha + Long size + } + /** * The client used to carry out http requests */ @@ -376,6 +389,38 @@ abstract class RepositoryProvider { */ abstract byte[] readBytes( String path ) + /** + * List directory contents in the remote repository with depth control + * + * @param path The relative path of the directory to list (empty string or null for root) + * @param depth The maximum depth of traversal: + * - depth = 1: immediate children only + * - depth = 2: children + grandchildren + * - depth = 3: children + grandchildren + great-grandchildren + * - larger values: traverse deeper accordingly + * + * Example: Given repository structure: + *
+     * /
+     * ├── file-a.txt
+     * ├── file-b.txt
+     * ├── dir-a/
+     * │   ├── file-c.txt
+     * │   └── subdir/
+     * │       └── file-d.txt
+     * └── dir-b/
+     *     └── file-e.txt
+     * 
+ * + * Results for listDirectory("/", depth): + * - depth = 1: [file-a.txt, file-b.txt, dir-a/, dir-b/] + * - depth = 2: [file-a.txt, file-b.txt, dir-a/, dir-b/, file-c.txt, file-e.txt] + * - depth = 3: [file-a.txt, file-b.txt, dir-a/, dir-b/, file-c.txt, file-e.txt, file-d.txt] + * + * @return A list of repository entries (files and directories) excluding the root directory itself + */ + abstract List listDirectory( String path, int depth ) + String readText( String path ) { def bytes = readBytes(path) return bytes ? new String(bytes) : null @@ -472,4 +517,76 @@ abstract class RepositoryProvider { return builder.build() } + /** + * Normalizes a path for repository operations by treating "/" as an empty path (root directory). + * This helper method ensures consistent path handling across all repository providers. + * + * @param path The input path to normalize + * @return Normalized path: null/empty/"/" becomes "", otherwise removes leading slash + */ + protected static String normalizePath(String path) { + if (path == "/" || path == null || path.isEmpty()) { + return "" + } + return path.startsWith("/") ? path.substring(1) : path + } + + /** + * Ensures a path starts with "/" to create an absolute path for consistent API responses. + * This helper method is used when creating RepositoryEntry objects. + * + * @param path The input path + * @return Absolute path starting with "/" + */ + protected static String ensureAbsolutePath(String path) { + if (path == null || path.isEmpty()) { + return "/" + } + return path.startsWith("/") ? path : "/" + path + } + + /** + * Checks if an entry should be included based on depth filtering. + * This helper provides consistent depth semantics across providers. + * + * @param entryPath The full path of the entry + * @param basePath The base directory path being listed + * @param depth The maximum depth (-1 for unlimited, 0 for immediate children only) + * @return true if the entry should be included + */ + protected static boolean shouldIncludeAtDepth(String entryPath, String basePath, int depth) { + if (depth == -1) { + return true // Unlimited depth + } + + String relativePath = entryPath + String normalizedBasePath = normalizePath(basePath) + + if (normalizedBasePath && !normalizedBasePath.isEmpty()) { + String normalizedEntry = entryPath.stripStart('/').stripEnd('/') + normalizedBasePath = normalizedBasePath.stripEnd('/') + + if (normalizedEntry.startsWith(normalizedBasePath + "/")) { + relativePath = normalizedEntry.substring(normalizedBasePath.length() + 1) + } else if (normalizedEntry == normalizedBasePath) { + return false // Skip the base directory itself + } else { + return false // Entry is not under the base path + } + } else { + // For root directory, use the entry path directly + relativePath = entryPath.stripStart('/').stripEnd('/') + } + + if (relativePath.isEmpty()) { + return false + } + + // Count directory levels in the relative path + int entryDepth = relativePath.split("/").length - 1 + + // Include if within depth limit: depth=0 includes immediate children only + return entryDepth <= depth + } + } diff --git a/modules/nextflow/src/test/groovy/nextflow/scm/AzureRepositoryProviderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/scm/AzureRepositoryProviderTest.groovy index 2ee6c7bf7e..de52cd4801 100644 --- a/modules/nextflow/src/test/groovy/nextflow/scm/AzureRepositoryProviderTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/scm/AzureRepositoryProviderTest.groovy @@ -243,4 +243,68 @@ class AzureRepositoryProviderTest extends Specification { then: result=='hello\n' } + + @IgnoreIf({System.getenv('NXF_SMOKE')}) + @Requires({System.getenv('NXF_AZURE_REPOS_TOKEN')}) + def 'should list root directory contents'() { + given: + def token = System.getenv('NXF_AZURE_REPOS_TOKEN') + def config = new ProviderConfig('azurerepos').setAuth(token) + def repo = new AzureRepositoryProvider('pditommaso/nf-azure-repo', config) + + when: + def entries = repo.listDirectory("/", 1) + + then: + entries.size() > 0 + and: + entries.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'docs' && it.type == RepositoryProvider.EntryType.DIRECTORY } + and: + // Should NOT include nested files for depth=1 + !entries.any { it.path == '/docs/images/nf-core-rnaseq_logo_light.png' } + and: + entries.every { it.path && it.sha } + } + + @IgnoreIf({System.getenv('NXF_SMOKE')}) + @Requires({System.getenv('NXF_AZURE_REPOS_TOKEN')}) + def 'should list docs directory contents'() { + given: + def token = System.getenv('NXF_AZURE_REPOS_TOKEN') + def config = new ProviderConfig('azurerepos').setAuth(token) + def repo = new AzureRepositoryProvider('pditommaso/nf-azure-repo', config) + + when: + def entries = repo.listDirectory("/docs", 1) + + then: + entries.size() > 0 + entries.every { it.path.startsWith('/docs/') } + entries.any { it.name == 'images' && it.type == RepositoryProvider.EntryType.DIRECTORY } + and: + // Should NOT include nested files for depth=1 + !entries.any { it.path == '/docs/images/nf-core-rnaseq_logo_light.png' } + and: + entries.every { it.path && it.sha } + } + + @IgnoreIf({System.getenv('NXF_SMOKE')}) + @Requires({System.getenv('NXF_AZURE_REPOS_TOKEN')}) + def 'should list subdirectory contents'() { + given: + def token = System.getenv('NXF_AZURE_REPOS_TOKEN') + def config = new ProviderConfig('azurerepos').setAuth(token) + def repo = new AzureRepositoryProvider('pditommaso/nf-azure-repo', config) + + when: + def entries = repo.listDirectory("/docs", 2) + + then: + entries.size() > 0 + entries.every { it.path.startsWith('/docs/') } + // Should include both the subdirectory and files within it up to depth 2 + entries.any { it.name == 'images' && it.type == RepositoryProvider.EntryType.DIRECTORY } + entries.any { it.name == 'nf-core-rnaseq_logo_light.png' && it.path == '/docs/images/nf-core-rnaseq_logo_light.png' && it.type == RepositoryProvider.EntryType.FILE } + } } diff --git a/modules/nextflow/src/test/groovy/nextflow/scm/BitbucketRepositoryProviderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/scm/BitbucketRepositoryProviderTest.groovy index 9765ea2c44..d1789f5f3e 100644 --- a/modules/nextflow/src/test/groovy/nextflow/scm/BitbucketRepositoryProviderTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/scm/BitbucketRepositoryProviderTest.groovy @@ -205,4 +205,69 @@ class BitbucketRepositoryProviderTest extends Specification { ["Authorization", "Basic ${"foo:bar".bytes.encodeBase64()}"] | new ProviderConfig('bitbucket').setUser('foo').setPassword('bar') ["Authorization", "Basic ${"foo@nextflow.io:xyz".bytes.encodeBase64()}"] | new ProviderConfig('bitbucket').setUser('foo@nextflow.io').setToken('xyz') } + + @Requires({ System.getenv('NXF_BITBUCKET_ACCESS_TOKEN') }) + def 'should list root directory contents'() { + given: + def token = System.getenv('NXF_BITBUCKET_ACCESS_TOKEN') + def config = new ProviderConfig('bitbucket').setAuth(token) + def repo = new BitbucketRepositoryProvider('pditommaso/tutorial', config) + + when: + def entries = repo.listDirectory("/", 1) + + then: + entries.size() > 0 + and: + entries.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + and: + entries.every { it.path && it.name && it.sha } + // Should only include immediate children for depth=1 + entries.every { it.path.split('/').length <= 2 } + and: + // Should NOT include any nested paths beyond immediate children + !entries.any { it.path.split('/').length > 2 } + } + + @Requires({ System.getenv('NXF_BITBUCKET_ACCESS_TOKEN') }) + def 'should list directory contents recursively'() { + given: + def token = System.getenv('NXF_BITBUCKET_ACCESS_TOKEN') + def config = new ProviderConfig('bitbucket').setAuth(token) + def repo = new BitbucketRepositoryProvider('pditommaso/tutorial', config) + + when: + def entries = repo.listDirectory("/", 10) + + then: + entries.size() > 0 + and: + // Should include files from root and potentially subdirectories + entries.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + and: + entries.every { it.path && it.name && it.sha } + } + + @Requires({ System.getenv('NXF_BITBUCKET_ACCESS_TOKEN') }) + def 'should list directory contents with depth 2'() { + given: + def token = System.getenv('NXF_BITBUCKET_ACCESS_TOKEN') + def config = new ProviderConfig('bitbucket').setAuth(token) + def repo = new BitbucketRepositoryProvider('pditommaso/tutorial', config) + + when: + def depthOne = repo.listDirectory("/", 1) + def depthTwo = repo.listDirectory("/", 2) + + then: + depthOne.size() > 0 + depthTwo.size() >= depthOne.size() + and: + // Should include immediate children (depth 1) + depthOne.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + depthTwo.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + and: + depthOne.every { it.path && it.name && it.sha } + depthTwo.every { it.path && it.name && it.sha } + } } diff --git a/modules/nextflow/src/test/groovy/nextflow/scm/BitbucketServerRepositoryProviderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/scm/BitbucketServerRepositoryProviderTest.groovy index f66df1253e..5ac2ec3fff 100644 --- a/modules/nextflow/src/test/groovy/nextflow/scm/BitbucketServerRepositoryProviderTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/scm/BitbucketServerRepositoryProviderTest.groovy @@ -126,4 +126,19 @@ class BitbucketServerRepositoryProviderTest extends Specification { then: result.contains( new RepositoryProvider.TagInfo('v1.0', 'c62df3d9c2464adcaa0fb6c978c8e32e2672b191') ) } + + @Requires( { System.getenv('NXF_BITBUCKET_SERVER_ACCESS_TOKEN') } ) + def 'should list root directory contents'() { + given: + def token = System.getenv('NXF_BITBUCKET_SERVER_ACCESS_TOKEN') + def config = new ProviderConfig('bbs', [server:'http://slurm.seqera.io:7990', platform:'bitbucketsever']).setAuth(token) + def repo = new BitbucketServerRepositoryProvider('scm/hello/hello', config) + + when: + repo.listDirectory("/", 1) + + then: + thrown UnsupportedOperationException + } + } diff --git a/modules/nextflow/src/test/groovy/nextflow/scm/GiteaRepositoryProviderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/scm/GiteaRepositoryProviderTest.groovy index 29f05e1e8d..cce9716e22 100644 --- a/modules/nextflow/src/test/groovy/nextflow/scm/GiteaRepositoryProviderTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/scm/GiteaRepositoryProviderTest.groovy @@ -126,4 +126,93 @@ class GiteaRepositoryProviderTest extends Specification { result.length == 22915 result.sha256() == '7a396344498750f614155f6e4f38b7d6ca98ced45daf0921b64acf73b18efaf4' } + + @IgnoreIf({System.getenv('NXF_SMOKE')}) + @Requires({System.getenv('NXF_GITEA_ACCESS_TOKEN')}) + def 'should list root directory contents'() { + given: + def token = System.getenv('NXF_GITEA_ACCESS_TOKEN') + def config = new ProviderConfig('gitea').setAuth(token) + def repo = new GiteaRepositoryProvider('pditommaso/test-hello', config) + + when: + def entries = repo.listDirectory("/", 1) + + then: + entries.size() > 0 + and: + entries.any { it.name == 'README.md' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'test' && it.type == RepositoryProvider.EntryType.DIRECTORY } + and: + // Should NOT include nested files for depth=1 + !entries.any { it.path == '/test/test-asset.bin' } + and: + entries.every { it.path && it.sha } + } + + @IgnoreIf({System.getenv('NXF_SMOKE')}) + @Requires({System.getenv('NXF_GITEA_ACCESS_TOKEN')}) + def 'should list subdirectory contents'() { + given: + def token = System.getenv('NXF_GITEA_ACCESS_TOKEN') + def config = new ProviderConfig('gitea').setAuth(token) + def repo = new GiteaRepositoryProvider('pditommaso/test-hello', config) + + when: + def entries = repo.listDirectory("/test", 1) + + then: + entries.size() > 0 + entries.any { it.name == 'test-asset.bin' && it.type == RepositoryProvider.EntryType.FILE } + entries.every { it.path.startsWith('/test/') } + and: + entries.every { it.path && it.name && it.sha } + } + + @IgnoreIf({System.getenv('NXF_SMOKE')}) + @Requires({System.getenv('NXF_GITEA_ACCESS_TOKEN')}) + def 'should list directory contents recursively'() { + given: + def token = System.getenv('NXF_GITEA_ACCESS_TOKEN') + def config = new ProviderConfig('gitea').setAuth(token) + def repo = new GiteaRepositoryProvider('pditommaso/test-hello', config) + + when: + def entries = repo.listDirectory("/", 10) + + then: + entries.size() > 0 + and: + // Should include files from root and potentially subdirectories + entries.any { it.name == 'README.md' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'test-asset.bin' && it.path.contains('/test/') } + and: + entries.every { it.path && it.name && it.sha } + } + + @IgnoreIf({System.getenv('NXF_SMOKE')}) + @Requires({System.getenv('NXF_GITEA_ACCESS_TOKEN')}) + def 'should list directory contents with depth 2'() { + given: + def token = System.getenv('NXF_GITEA_ACCESS_TOKEN') + def config = new ProviderConfig('gitea').setAuth(token) + def repo = new GiteaRepositoryProvider('pditommaso/test-hello', config) + + when: + def depthOne = repo.listDirectory("/", 1) + def depthTwo = repo.listDirectory("/", 2) + + then: + depthOne.size() > 0 + depthTwo.size() >= depthOne.size() + and: + // Should include immediate children (depth 1) + depthOne.any { it.name == 'README.md' && it.type == RepositoryProvider.EntryType.FILE } + depthTwo.any { it.name == 'README.md' && it.type == RepositoryProvider.EntryType.FILE } + // Should include nested files (depth 2) + depthTwo.any { it.name == 'test-asset.bin' && it.path.contains('/test/') } + and: + depthOne.every { it.path && it.name && it.sha } + depthTwo.every { it.path && it.name && it.sha } + } } diff --git a/modules/nextflow/src/test/groovy/nextflow/scm/GithubRepositoryProviderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/scm/GithubRepositoryProviderTest.groovy index b642222efb..dbb92ed14c 100644 --- a/modules/nextflow/src/test/groovy/nextflow/scm/GithubRepositoryProviderTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/scm/GithubRepositoryProviderTest.groovy @@ -167,5 +167,106 @@ class GithubRepositoryProviderTest extends Specification { then: result.trim().startsWith(/println "I'm the main"/) } + + @Requires({System.getenv('NXF_GITHUB_ACCESS_TOKEN')}) + def 'should list root directory contents'() { + given: + def token = System.getenv('NXF_GITHUB_ACCESS_TOKEN') + def config = new ProviderConfig('github').setAuth(token) + def repo = new GithubRepositoryProvider('nextflow-io/test-hello', config) + + when: + def entries = repo.listDirectory("/", 1) + + then: + entries.size() > 0 + and: + entries.any { it.name == 'main.nf' && it.path == '/main.nf' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'test' && it.path == '/test' && it.type == RepositoryProvider.EntryType.DIRECTORY } + and: + !entries.any { it.path == '/test/test-asset.bin' } + and: + entries.every { it.path && it.sha } + } + + @Requires({System.getenv('NXF_GITHUB_ACCESS_TOKEN')}) + def 'should list subdirectory contents'() { + given: + def token = System.getenv('NXF_GITHUB_ACCESS_TOKEN') + def config = new ProviderConfig('github').setAuth(token) + def repo = new GithubRepositoryProvider('nextflow-io/test-hello', config) + + when: + def entries = repo.listDirectory("/test", 1) + + then: + entries.size() > 0 + entries.any { it.name == 'test-asset.bin' && it.type == RepositoryProvider.EntryType.FILE } + entries.every { it.path.startsWith('/test/') } + } + + @Requires({System.getenv('NXF_GITHUB_ACCESS_TOKEN')}) + def 'should list directory contents recursively'() { + given: + def token = System.getenv('NXF_GITHUB_ACCESS_TOKEN') + def config = new ProviderConfig('github').setAuth(token) + def repo = new GithubRepositoryProvider('nextflow-io/test-hello', config) + + when: + def entries = repo.listDirectory("/", 10) + + then: + entries.size() > 0 + and: + // Should include files from root and subdirectories + entries.any { it.path == '/main.nf' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.path == '/test/test-asset.bin' && it.type == RepositoryProvider.EntryType.FILE } + and: + entries.every { it.path && it.sha } + } + + @Requires({System.getenv('NXF_GITHUB_ACCESS_TOKEN')}) + def 'should list directory contents with limited depth'() { + given: + def token = System.getenv('NXF_GITHUB_ACCESS_TOKEN') + def config = new ProviderConfig('github').setAuth(token) + def repo = new GithubRepositoryProvider('nextflow-io/test-hello', config) + + when: + def depthOne = repo.listDirectory("/", 1) + def depthTwo = repo.listDirectory("/", 2) + + then: + depthOne.size() > 0 + depthTwo.size() >= depthOne.size() + // Depth 1 should only include immediate children (no nested paths beyond root) + depthOne.every { it.path.split('/').length <= 2 } + } + + @Requires({System.getenv('NXF_GITHUB_ACCESS_TOKEN')}) + def 'should list directory contents with depth 2'() { + given: + def token = System.getenv('NXF_GITHUB_ACCESS_TOKEN') + def config = new ProviderConfig('github').setAuth(token) + def repo = new GithubRepositoryProvider('nextflow-io/test-hello', config) + + when: + def entries = repo.listDirectory("/", 2) + + then: + entries.size() > 0 + // Should include immediate children (depth 1) + entries.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'test' && it.type == RepositoryProvider.EntryType.DIRECTORY } + // Should include nested files (depth 2) + entries.any { it.name == 'test-asset.bin' && it.path.contains('/test/') } + entries.every { it.path && it.sha } + } + + def 'should return empty list for directory with no entries'() { + expect: + // This test will be integration test based - relying on actual API + true + } } diff --git a/modules/nextflow/src/test/groovy/nextflow/scm/GitlabRepositoryProviderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/scm/GitlabRepositoryProviderTest.groovy index 59f7b848de..7f4bd1fb86 100644 --- a/modules/nextflow/src/test/groovy/nextflow/scm/GitlabRepositoryProviderTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/scm/GitlabRepositoryProviderTest.groovy @@ -147,6 +147,80 @@ class GitlabRepositoryProviderTest extends Specification { new GitlabRepositoryProvider('pditommaso/hello', obj) .getContentUrl('//conf/extra.conf') == 'https://gitlab.com/api/v4/projects/pditommaso%2Fhello/repository/files/conf%2Fextra.conf?ref=master' + } + + @Requires({System.getenv('NXF_GITLAB_ACCESS_TOKEN')}) + def 'should list root directory contents'() { + given: + def token = System.getenv('NXF_GITLAB_ACCESS_TOKEN') + def config = new ProviderConfig('gitlab').setAuth(token) + def repo = new GitlabRepositoryProvider('pditommaso/hello', config) + + when: + def entries = repo.listDirectory("/", 1) + + then: + entries.size() > 0 + and: + entries.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'test' && it.type == RepositoryProvider.EntryType.DIRECTORY } + and: + // Should NOT include nested files for depth=1 + !entries.any { it.path == '/test/test-asset.bin' } + and: + entries.every { it.path && it.sha } + } + + @Requires({System.getenv('NXF_GITLAB_ACCESS_TOKEN')}) + def 'should list subdirectory contents'() { + given: + def token = System.getenv('NXF_GITLAB_ACCESS_TOKEN') + def config = new ProviderConfig('gitlab').setAuth(token) + def repo = new GitlabRepositoryProvider('pditommaso/hello', config) + + when: + def entries = repo.listDirectory("/test", 1) + then: + entries.size() > 0 + entries.any { it.name == 'test-asset.bin' && it.path=='/test/test-asset.bin' && it.type == RepositoryProvider.EntryType.FILE } + entries.every { it.path.startsWith('/test/') } + } + + @Requires({System.getenv('NXF_GITLAB_ACCESS_TOKEN')}) + def 'should list directory contents recursively'() { + given: + def token = System.getenv('NXF_GITLAB_ACCESS_TOKEN') + def config = new ProviderConfig('gitlab').setAuth(token) + def repo = new GitlabRepositoryProvider('pditommaso/hello', config) + + when: + def entries = repo.listDirectory("/", 10) + + then: + entries.size() > 0 + entries.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'test-asset.bin' && it.type == RepositoryProvider.EntryType.FILE } + entries.every { it.path && it.sha } + } + + @Requires({System.getenv('NXF_GITLAB_ACCESS_TOKEN')}) + def 'should list directory contents with depth 2'() { + given: + def token = System.getenv('NXF_GITLAB_ACCESS_TOKEN') + def config = new ProviderConfig('gitlab').setAuth(token) + def repo = new GitlabRepositoryProvider('pditommaso/hello', config) + + when: + def entries = repo.listDirectory("/", 2) + + then: + entries.size() > 0 + // Should include immediate children (depth 1) + entries.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'test' && it.type == RepositoryProvider.EntryType.DIRECTORY } + // Should include nested files (depth 2) + entries.any { it.name == 'test-asset.bin' && it.path.contains('/test/') } + entries.every { it.path && it.sha } } } diff --git a/modules/nextflow/src/test/groovy/nextflow/scm/LocalRepositoryProviderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/scm/LocalRepositoryProviderTest.groovy index 0ca0d2a737..e07d230a2c 100644 --- a/modules/nextflow/src/test/groovy/nextflow/scm/LocalRepositoryProviderTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/scm/LocalRepositoryProviderTest.groovy @@ -185,4 +185,114 @@ class LocalRepositoryProviderTest extends Specification { and: branches.find { it.name == 'branch_2' }.commitId == ref2.getObjectId().name() } + + def 'should list root directory contents'() { + given: + def dir = testFolder.resolve('project_hello').toFile() + new File(dir, 'test.txt').text = 'test content' + new File(dir, 'subdir').mkdirs() + new File(dir, 'subdir/nested.txt').text = 'nested content' + repo.add().addFilepattern('.').call() + repo.commit().setSign(false).setMessage('Add test files').call() + + def config = new ProviderConfig('local', [path: testFolder]) + def manager = new LocalRepositoryProvider('project_hello', config) + + when: + def entries = manager.listDirectory("/", 1) + + then: + entries.size() > 0 + entries.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'test.txt' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'subdir' && it.type == RepositoryProvider.EntryType.DIRECTORY } + and: + // Should NOT include nested files for depth=1 + !entries.any { it.path=='/subdir/file1.txt' } + !entries.any { it.path=='/subdir/file2.txt' } + and: + entries.every { it.path && it.sha } + } + + def 'should list subdirectory contents'() { + given: + def dir = testFolder.resolve('project_hello').toFile() + new File(dir, 'subdir').mkdirs() + new File(dir, 'subdir/file1.txt').text = 'file1 content' + new File(dir, 'subdir/file2.txt').text = 'file2 content' + repo.add().addFilepattern('.').call() + repo.commit().setSign(false).setMessage('Add subdirectory files').call() + + def config = new ProviderConfig('local', [path: testFolder]) + def manager = new LocalRepositoryProvider('project_hello', config) + + when: + def entries = manager.listDirectory("/subdir", 1) + + then: + entries.size() == 2 + entries.any { it.name == 'file1.txt' && it.path=='/subdir/file1.txt' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'file2.txt' && it.path=='/subdir/file2.txt' && it.type == RepositoryProvider.EntryType.FILE } + entries.every { it.path.startsWith('/subdir/') } + and: + entries.every { it.path && it.name && it.sha } + } + + def 'should list directory contents recursively'() { + given: + def dir = testFolder.resolve('project_hello').toFile() + new File(dir, 'deep').mkdirs() + new File(dir, 'deep/subdir').mkdirs() + new File(dir, 'deep/file1.txt').text = 'deep file content' + new File(dir, 'deep/subdir/file2.txt').text = 'deeply nested content' + repo.add().addFilepattern('.').call() + repo.commit().setSign(false).setMessage('Add deep directory structure').call() + + def config = new ProviderConfig('local', [path: testFolder]) + def manager = new LocalRepositoryProvider('project_hello', config) + + when: + def entries = manager.listDirectory("/", 10) + + then: + entries.size() > 0 + and: + // Should include files from root and subdirectories + entries.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + entries.any { it.name == 'file1.txt' && it.path.contains('/deep/') } + entries.any { it.name == 'file2.txt' && it.path.contains('/deep/subdir/') } + and: + entries.every { it.path && it.name && it.sha } + } + + def 'should list directory contents with depth 2'() { + given: + def dir = testFolder.resolve('project_hello').toFile() + new File(dir, 'level1').mkdirs() + new File(dir, 'level1/level2').mkdirs() + new File(dir, 'level1/file-l1.txt').text = 'level 1 content' + new File(dir, 'level1/level2/file-l2.txt').text = 'level 2 content' + repo.add().addFilepattern('.').call() + repo.commit().setSign(false).setMessage('Add multi-level directory structure').call() + + def config = new ProviderConfig('local', [path: testFolder]) + def manager = new LocalRepositoryProvider('project_hello', config) + + when: + def depthOne = manager.listDirectory("/", 1) + def depthTwo = manager.listDirectory("/", 2) + + then: + depthOne.size() > 0 + depthTwo.size() >= depthOne.size() + and: + // Should include immediate children (depth 1) + depthOne.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + depthTwo.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + // Should include nested files (depth 2) + depthTwo.any { it.name == 'file-l1.txt' && it.path.contains('/level1/') } + and: + depthOne.every { it.path && it.name && it.sha } + depthTwo.every { it.path && it.name && it.sha } + } } diff --git a/modules/nextflow/src/test/groovy/nextflow/scm/RepositoryProviderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/scm/RepositoryProviderTest.groovy index 4c7f7a2651..69b0c0ef94 100644 --- a/modules/nextflow/src/test/groovy/nextflow/scm/RepositoryProviderTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/scm/RepositoryProviderTest.groovy @@ -26,6 +26,7 @@ import nextflow.SysEnv import nextflow.exception.HttpResponseLengthExceedException import nextflow.util.RetryConfig import spock.lang.Specification +import spock.lang.Unroll /** * * @author Paolo Di Tommaso @@ -213,4 +214,120 @@ class RepositoryProviderTest extends Specification { } } + // ====== Path normalization helper method tests ====== + + @Unroll + def 'normalizePath should handle #description'() { + expect: + RepositoryProvider.normalizePath(INPUT) == EXPECTED + + where: + INPUT | EXPECTED | description + null | "" | "null input" + "" | "" | "empty string" + "/" | "" | "root directory slash" + "/docs" | "docs" | "absolute path" + "docs" | "docs" | "relative path" + "/docs/guide" | "docs/guide" | "nested absolute path" + "docs/guide" | "docs/guide" | "nested relative path" + "//" | "/" | "double slash" + "///docs" | "//docs" | "multiple leading slashes" + } + + @Unroll + def 'ensureAbsolutePath should handle #description'() { + expect: + RepositoryProvider.ensureAbsolutePath(INPUT) == EXPECTED + + where: + INPUT | EXPECTED | description + null | "/" | "null input" + "" | "/" | "empty string" + "/" | "/" | "root directory" + "/docs" | "/docs" | "already absolute path" + "docs" | "/docs" | "relative path" + "/docs/guide" | "/docs/guide" | "nested absolute path" + "docs/guide" | "/docs/guide" | "nested relative path" + "main.nf" | "/main.nf" | "simple filename" + } + + @Unroll + def 'shouldIncludeAtDepth should handle depth=#depth basePath=#basePath entryPath=#entryPath'() { + expect: + RepositoryProvider.shouldIncludeAtDepth(entryPath, basePath, depth) == expected + + where: + entryPath | basePath | depth | expected | description + // Root directory tests (basePath = null, "", or "/") + "main.nf" | null | 0 | true | "immediate child in root with depth 0" + "docs/guide.md" | null | 0 | false | "nested file in root with depth 0" + "docs/guide.md" | null | 1 | true | "nested file in root with depth 1" + "docs/sub/file.md" | null | 1 | false | "deeply nested file with depth 1" + "docs/sub/file.md" | null | 2 | true | "deeply nested file with depth 2" + "main.nf" | "" | 0 | true | "immediate child with empty basePath" + "main.nf" | "/" | 0 | true | "immediate child with root basePath" + + // Subdirectory tests + "docs/guide.md" | "docs" | 0 | true | "immediate child in subdirectory" + "docs/sub/file.md" | "docs" | 0 | false | "nested file in subdirectory with depth 0" + "docs/sub/file.md" | "docs" | 1 | true | "nested file in subdirectory with depth 1" + "docs/guide.md" | "/docs" | 0 | true | "immediate child with absolute basePath" + + // Edge cases + "docs" | "docs" | 0 | false | "base directory itself should be excluded" + "other/file.md" | "docs" | 0 | false | "file outside basePath should be excluded" + "main.nf" | null | -1 | true | "unlimited depth should include everything" + "docs/sub/deep.md" | null | -1 | true | "unlimited depth with nested file" + "" | null | 0 | false | "empty entryPath should be excluded" + + // Complex path tests + "docs/api/index.md" | "docs" | 1 | true | "api subdirectory file with depth 1" + "docs/api/ref.md" | "docs/api" | 0 | true | "immediate child of nested basePath" + "docs/api/v1/spec.md" | "docs" | 2 | true | "deeply nested with sufficient depth" + "docs/api/v1/spec.md" | "docs" | 1 | false | "deeply nested without sufficient depth" + } + + def 'shouldIncludeAtDepth should handle realistic directory structure'() { + given: + def entries = [ + "/main.nf", + "/nextflow.config", + "/README.md", + "/docs/guide.md", + "/docs/api/index.md", + "/docs/api/reference.md", + "/src/process.nf", + "/src/utils/helper.nf", + "/test/test-data.csv" + ] + + when: "listing root with depth 0" + def rootDepth0 = entries.findAll { RepositoryProvider.shouldIncludeAtDepth(it, "/", 0) } + + then: + rootDepth0.size() == 3 + rootDepth0.containsAll(["/main.nf", "/nextflow.config", "/README.md"]) + + when: "listing root with depth 1" + def rootDepth1 = entries.findAll { RepositoryProvider.shouldIncludeAtDepth(it, "/", 1) } + + then: + rootDepth1.size() == 6 + rootDepth1.containsAll(["/main.nf", "/nextflow.config", "/README.md", "/docs/guide.md", "/src/process.nf", "/test/test-data.csv"]) + + when: "listing docs with depth 0" + def docsDepth0 = entries.findAll { RepositoryProvider.shouldIncludeAtDepth(it, "/docs", 0) } + + then: + docsDepth0.size() == 1 + docsDepth0.contains("/docs/guide.md") + + when: "listing docs with depth 1" + def docsDepth1 = entries.findAll { RepositoryProvider.shouldIncludeAtDepth(it, "/docs", 1) } + + then: + docsDepth1.size() == 3 + docsDepth1.containsAll(["/docs/guide.md", "/docs/api/index.md", "/docs/api/reference.md"]) + } + } diff --git a/plugins/nf-codecommit/src/main/nextflow/cloud/aws/codecommit/AwsCodeCommitRepositoryProvider.groovy b/plugins/nf-codecommit/src/main/nextflow/cloud/aws/codecommit/AwsCodeCommitRepositoryProvider.groovy index 36bf512da6..91fe21f52f 100644 --- a/plugins/nf-codecommit/src/main/nextflow/cloud/aws/codecommit/AwsCodeCommitRepositoryProvider.groovy +++ b/plugins/nf-codecommit/src/main/nextflow/cloud/aws/codecommit/AwsCodeCommitRepositoryProvider.groovy @@ -24,6 +24,7 @@ import software.amazon.awssdk.regions.Region import software.amazon.awssdk.services.codecommit.CodeCommitClient import software.amazon.awssdk.services.codecommit.model.CodeCommitException import software.amazon.awssdk.services.codecommit.model.GetFileRequest +import software.amazon.awssdk.services.codecommit.model.GetFolderRequest import software.amazon.awssdk.services.codecommit.model.GetRepositoryRequest import software.amazon.awssdk.services.codecommit.model.RepositoryMetadata import groovy.transform.CompileStatic @@ -33,6 +34,7 @@ import nextflow.exception.AbortOperationException import nextflow.exception.MissingCredentialsException import nextflow.scm.ProviderConfig import nextflow.scm.RepositoryProvider +import nextflow.scm.RepositoryProvider.RepositoryEntry import nextflow.util.StringUtils import org.eclipse.jgit.api.errors.TransportException import org.eclipse.jgit.transport.CredentialsProvider @@ -154,6 +156,63 @@ class AwsCodeCommitRepositoryProvider extends RepositoryProvider { } } + /** {@inheritDoc} **/ + @Override + List listDirectory(String path, int depth) { + try { + // AWS CodeCommit doesn't have a dedicated directory listing API like GitHub + // We would need to use GetFolder API, but it has limitations + def request = GetFolderRequest.builder() + .repositoryName(repositoryName) + .folderPath(path ?: "/") + .commitSpecifier(revision ?: "HEAD") + .build() + + def response = client.getFolder(request) + + List entries = [] + + // Add files + response.files()?.each { file -> + entries.add(new RepositoryEntry( + name: file.relativePath().split('/').last(), + path: ensureAbsolutePath(file.relativePath()), + type: RepositoryProvider.EntryType.FILE, + sha: file.blobId(), + size: null // AWS CodeCommit API doesn't provide file size in folder response + )) + } + + // Add subdirectories - but CodeCommit API has limited support for deep traversal + response.subFolders()?.each { folder -> + entries.add(new RepositoryEntry( + name: folder.relativePath().split('/').last(), + path: ensureAbsolutePath(folder.relativePath()), + type: RepositoryProvider.EntryType.DIRECTORY, + sha: null, // CodeCommit doesn't provide SHA for directories + size: null + )) + + // For recursive listing, we would need additional API calls + // However, this can be expensive and slow for large repositories + if (depth != 0 && depth != 1) { + try { + def subEntries = listDirectory(folder.relativePath(), depth == -1 ? -1 : depth - 1) + entries.addAll(subEntries) + } catch (Exception e) { + // Continue with other directories if one fails + } + } + } + + return entries.sort { it.name } + + } catch (Exception e) { + checkMissingCredsException(e) + throw new UnsupportedOperationException("Directory listing failed for AWS CodeCommit path: $path - ${e.message}", e) + } + } + protected void checkMissingCredsException(Exception e) { final errs = [ "Failed to connect to service endpoint", diff --git a/plugins/nf-codecommit/src/test/nextflow/cloud/aws/codecommit/AwsCodeCommitRepositoryProviderTest.groovy b/plugins/nf-codecommit/src/test/nextflow/cloud/aws/codecommit/AwsCodeCommitRepositoryProviderTest.groovy index 7c5ecb7dd4..62bb6d7ac4 100644 --- a/plugins/nf-codecommit/src/test/nextflow/cloud/aws/codecommit/AwsCodeCommitRepositoryProviderTest.groovy +++ b/plugins/nf-codecommit/src/test/nextflow/cloud/aws/codecommit/AwsCodeCommitRepositoryProviderTest.groovy @@ -106,4 +106,56 @@ class AwsCodeCommitRepositoryProviderTest extends Specification { } + def 'should list root directory contents'() { + given: + def config = new AwsCodeCommitProviderConfig('git-codecommit.eu-west-1.amazonaws.com') + def provider = new AwsCodeCommitRepositoryProvider('codecommit-eu-west-1/my-repo', config) + + when: + def entries = provider.listDirectory("/", 1) + + then: + entries.size() > 0 + and: + entries.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + and: + entries.every { it.path && it.name && it.sha } + // Should only include immediate children for depth=1 + entries.every { it.path.split('/').length <= 2 } + } + + def 'should list directory contents recursively'() { + given: + def config = new AwsCodeCommitProviderConfig('git-codecommit.eu-west-1.amazonaws.com') + def provider = new AwsCodeCommitRepositoryProvider('codecommit-eu-west-1/my-repo', config) + + when: + def entries = provider.listDirectory("/", 10) + + then: + entries.size() > 0 + and: + // Should include files from root and potentially subdirectories + entries.any { it.name == 'main.nf' && it.type == RepositoryProvider.EntryType.FILE } + and: + entries.every { it.path && it.name && it.sha } + } + + def 'should list directory contents with depth 2'() { + given: + def config = new AwsCodeCommitProviderConfig('git-codecommit.eu-west-1.amazonaws.com') + def provider = new AwsCodeCommitRepositoryProvider('codecommit-eu-west-1/my-repo', config) + + when: + def depthOne = provider.listDirectory("/", 1) + def depthTwo = provider.listDirectory("/", 2) + + then: + depthOne.size() > 0 + depthTwo.size() >= depthOne.size() + and: + depthOne.every { it.path && it.name && it.sha } + depthTwo.every { it.path && it.name && it.sha } + } + }