From e5685bb3ce7fd842d194b7b37e736581b9d8cf9c Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 2 Jun 2025 13:46:15 +0200 Subject: [PATCH 1/5] tests: add test with mocked servers --- backend/doi/api/inveniotypes.go | 1 - backend/doi/doi.go | 21 ++- backend/doi/doi_internal_test.go | 215 +++++++++++++++++++++++++++++++ 3 files changed, 233 insertions(+), 4 deletions(-) diff --git a/backend/doi/api/inveniotypes.go b/backend/doi/api/inveniotypes.go index 75d5ad2407d9f..bdb8664511942 100644 --- a/backend/doi/api/inveniotypes.go +++ b/backend/doi/api/inveniotypes.go @@ -5,7 +5,6 @@ package api // InvenioRecordResponse is the representation of a record stored in InvenioRDM type InvenioRecordResponse struct { Links InvenioRecordResponseLinks `json:"links"` - // Metadata InvenioRecordMetadata `json:"metadata"` } // InvenioRecordResponseLinks represents a record's links diff --git a/backend/doi/doi.go b/backend/doi/doi.go index af734737f47c2..ebe22c7cda252 100644 --- a/backend/doi/doi.go +++ b/backend/doi/doi.go @@ -73,6 +73,15 @@ The DOI provider can be set when rclone does not automatically recognize a suppo }}, Required: false, Advanced: true, + }, { + Name: "doi_resolver_api_url", + Help: `The URL of the DOI resolver API to use. + +The DOI resolver can be set for testing or for cases when the the canonical DOI resolver API cannot be used. + +Defaults to "https://doi.org/api".`, + Required: false, + Advanced: true, }}, } fs.Register(fsi) @@ -92,8 +101,9 @@ const ( // Options defines the configuration for this backend type Options struct { - Doi string `config:"doi"` // The DOI, a digital identifier of an object, usually a dataset - Provider string `config:"provider"` // The DOI provider + Doi string `config:"doi"` // The DOI, a digital identifier of an object, usually a dataset + Provider string `config:"provider"` // The DOI provider + DoiResolverAPIURL string `config:"doi_resolver_api_url"` // The URL of the DOI resolver API to use. } // Fs stores the interface to the remote HTTP files @@ -144,12 +154,17 @@ func parseDoi(doi string) string { // Resolve a DOI to a URL // Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation func resolveDoiURL(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, opt *Options) (doiURL *url.URL, err error) { + resolverURL := opt.DoiResolverAPIURL + if resolverURL == "" { + resolverURL = doiResolverAPIURL + } + var result api.DoiResolverResponse params := url.Values{} params.Add("index", "1") opts := rest.Opts{ Method: "GET", - RootURL: doiResolverAPIURL, + RootURL: resolverURL, Path: "/handles/" + opt.Doi, Parameters: params, } diff --git a/backend/doi/doi_internal_test.go b/backend/doi/doi_internal_test.go index 2a5004eb37780..91ecb2444f223 100644 --- a/backend/doi/doi_internal_test.go +++ b/backend/doi/doi_internal_test.go @@ -1,11 +1,28 @@ package doi import ( + "context" + "crypto/md5" + "encoding/hex" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "net/url" + "sort" + "strings" "testing" + "time" + "github.com/rclone/rclone/backend/doi/api" + "github.com/rclone/rclone/fs/config/configmap" + "github.com/rclone/rclone/fs/hash" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) +var remoteName = "TestDoi" + func TestParseDoi(t *testing.T) { // 10.1000/182 -> 10.1000/182 doi := "10.1000/182" @@ -32,3 +49,201 @@ func TestParseDoi(t *testing.T) { parsed = parseDoi(doi) assert.Equal(t, "10.1000/182", parsed) } + +// prepareMockDoiResolverServer prepares a test server to resolve DOIs +func prepareMockDoiResolverServer(t *testing.T, resolvedURL string) (doiResolverApiUrl string) { + mux := http.NewServeMux() + + // Handle requests for resolving DOIs + mux.HandleFunc("GET /api/handles/{handle...}", func(w http.ResponseWriter, r *http.Request) { + // Check that we are resolving a DOI + handle := strings.TrimPrefix(r.URL.Path, "/api/handles/") + assert.NotEmpty(t, handle) + index := r.URL.Query().Get("index") + assert.Equal(t, "1", index) + + // Return the most basic response + result := api.DoiResolverResponse{ + ResponseCode: 1, + Handle: handle, + Values: []api.DoiResolverResponseValue{ + { + Index: 1, + Type: "URL", + Data: api.DoiResolverResponseValueData{ + Format: "string", + Value: resolvedURL, + }, + }, + }, + } + resultBytes, err := json.Marshal(result) + require.NoError(t, err) + w.Header().Add("Content-Type", "application/json") + w.Write(resultBytes) + }) + + // Make the test server + ts := httptest.NewServer(mux) + + // Close the server at the end of the test + t.Cleanup(ts.Close) + + return ts.URL + "/api" +} + +func md5Sum(text string) string { + hash := md5.Sum([]byte(text)) + return hex.EncodeToString(hash[:]) +} + +// prepareMockZenodoServer prepares a test server that mocks Zenodo.org +func prepareMockZenodoServer(t *testing.T, files map[string]string) *httptest.Server { + mux := http.NewServeMux() + + // Handle requests for a single record + mux.HandleFunc("GET /api/records/{recordID...}", func(w http.ResponseWriter, r *http.Request) { + // Check that we are returning data about a single record + recordID := strings.TrimPrefix(r.URL.Path, "/api/records/") + assert.NotEmpty(t, recordID) + + // Return the most basic response + selfURL, err := url.Parse("http://" + r.Host) + require.NoError(t, err) + selfURL = selfURL.JoinPath(r.URL.String()) + result := api.InvenioRecordResponse{ + Links: api.InvenioRecordResponseLinks{ + Self: selfURL.String(), + }, + } + resultBytes, err := json.Marshal(result) + require.NoError(t, err) + w.Header().Add("Content-Type", "application/json") + w.Write(resultBytes) + }) + // Handle requests for listing files in a record + mux.HandleFunc("GET /api/records/{record}/files", func(w http.ResponseWriter, r *http.Request) { + // Return the most basic response + filesBaseURL, err := url.Parse("http://" + r.Host) + require.NoError(t, err) + filesBaseURL = filesBaseURL.JoinPath("/api/files/") + + entries := []api.InvenioFilesResponseEntry{} + for filename, contents := range files { + entries = append(entries, + api.InvenioFilesResponseEntry{ + Key: filename, + Checksum: md5Sum(contents), + Size: int64(len(contents)), + Updated: time.Now().UTC().Format(time.RFC3339), + MimeType: "text/plain", + Links: api.InvenioFilesResponseEntryLinks{ + Content: filesBaseURL.JoinPath(filename).String(), + }, + }, + ) + } + + result := api.InvenioFilesResponse{ + Entries: entries, + } + resultBytes, err := json.Marshal(result) + require.NoError(t, err) + w.Header().Add("Content-Type", "application/json") + w.Write(resultBytes) + }) + // Handle requests for file contents + mux.HandleFunc("/api/files/{file}", func(w http.ResponseWriter, r *http.Request) { + // Check that we are returning the contents of a file + filename := strings.TrimPrefix(r.URL.Path, "/api/files/") + assert.NotEmpty(t, filename) + contents, found := files[filename] + if !found { + w.WriteHeader(404) + return + } + + // Return the most basic response + w.Write([]byte(contents)) + }) + + // Make the test server + ts := httptest.NewServer(mux) + + // Close the server at the end of the test + t.Cleanup(ts.Close) + + return ts +} + +func TestZenodoRemote(t *testing.T) { + recordID := "2600782" + doi := "10.5281/zenodo.2600782" + + // The files in the dataset + files := map[string]string{ + "README.md": "This is a dataset.", + "data.txt": "Some data", + } + + ts := prepareMockZenodoServer(t, files) + resolvedURL := ts.URL + "/record/" + recordID + + doiResolverApiUrl := prepareMockDoiResolverServer(t, resolvedURL) + + testConfig := configmap.Simple{ + "type": "doi", + "doi": doi, + "provider": "zenodo", + "doi_resolver_api_url": doiResolverApiUrl, + } + fs, err := NewFs(context.Background(), remoteName, "", testConfig) + require.NoError(t, err) + + // Test listing the DOI files + entries, err := fs.List(context.Background(), "") + require.NoError(t, err) + + sort.Sort(entries) + + require.Equal(t, len(files), len(entries)) + + e := entries[0] + assert.Equal(t, "README.md", e.Remote()) + assert.Equal(t, int64(18), e.Size()) + _, ok := e.(*Object) + assert.True(t, ok) + + e = entries[1] + assert.Equal(t, "data.txt", e.Remote()) + assert.Equal(t, int64(9), e.Size()) + _, ok = e.(*Object) + assert.True(t, ok) + + // Test reading the DOI files + o, err := fs.NewObject(context.Background(), "README.md") + require.NoError(t, err) + assert.Equal(t, int64(18), o.Size()) + md5Hash, err := o.Hash(context.Background(), hash.MD5) + require.NoError(t, err) + assert.Equal(t, "464352b1cab5240e44528a56fda33d9d", md5Hash) + fd, err := o.Open(context.Background()) + require.NoError(t, err) + data, err := io.ReadAll(fd) + require.NoError(t, err) + require.NoError(t, fd.Close()) + assert.Equal(t, []byte(files["README.md"]), data) + + o, err = fs.NewObject(context.Background(), "data.txt") + require.NoError(t, err) + assert.Equal(t, int64(9), o.Size()) + md5Hash, err = o.Hash(context.Background(), hash.MD5) + require.NoError(t, err) + assert.Equal(t, "5b82f8bf4df2bfb0e66ccaa7306fd024", md5Hash) + fd, err = o.Open(context.Background()) + require.NoError(t, err) + data, err = io.ReadAll(fd) + require.NoError(t, err) + require.NoError(t, fd.Close()) + assert.Equal(t, []byte(files["data.txt"]), data) +} From 78b6e8335e0ca3562e058c6651323038f4e97c73 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 2 Jun 2025 14:17:48 +0200 Subject: [PATCH 2/5] small fixes --- backend/doi/doi_internal_test.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/backend/doi/doi_internal_test.go b/backend/doi/doi_internal_test.go index 91ecb2444f223..ad3622982d827 100644 --- a/backend/doi/doi_internal_test.go +++ b/backend/doi/doi_internal_test.go @@ -15,6 +15,7 @@ import ( "time" "github.com/rclone/rclone/backend/doi/api" + "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/config/configmap" "github.com/rclone/rclone/fs/hash" "github.com/stretchr/testify/assert" @@ -136,7 +137,7 @@ func prepareMockZenodoServer(t *testing.T, files map[string]string) *httptest.Se Checksum: md5Sum(contents), Size: int64(len(contents)), Updated: time.Now().UTC().Format(time.RFC3339), - MimeType: "text/plain", + MimeType: "text/plain; charset=utf-8", Links: api.InvenioFilesResponseEntryLinks{ Content: filesBaseURL.JoinPath(filename).String(), }, @@ -197,11 +198,11 @@ func TestZenodoRemote(t *testing.T) { "provider": "zenodo", "doi_resolver_api_url": doiResolverApiUrl, } - fs, err := NewFs(context.Background(), remoteName, "", testConfig) + f, err := NewFs(context.Background(), remoteName, "", testConfig) require.NoError(t, err) // Test listing the DOI files - entries, err := fs.List(context.Background(), "") + entries, err := f.List(context.Background(), "") require.NoError(t, err) sort.Sort(entries) @@ -221,7 +222,7 @@ func TestZenodoRemote(t *testing.T) { assert.True(t, ok) // Test reading the DOI files - o, err := fs.NewObject(context.Background(), "README.md") + o, err := f.NewObject(context.Background(), "README.md") require.NoError(t, err) assert.Equal(t, int64(18), o.Size()) md5Hash, err := o.Hash(context.Background(), hash.MD5) @@ -233,8 +234,11 @@ func TestZenodoRemote(t *testing.T) { require.NoError(t, err) require.NoError(t, fd.Close()) assert.Equal(t, []byte(files["README.md"]), data) + do, ok := o.(fs.MimeTyper) + require.True(t, ok) + assert.Equal(t, "text/plain; charset=utf-8", do.MimeType(context.Background())) - o, err = fs.NewObject(context.Background(), "data.txt") + o, err = f.NewObject(context.Background(), "data.txt") require.NoError(t, err) assert.Equal(t, int64(9), o.Size()) md5Hash, err = o.Hash(context.Background(), hash.MD5) @@ -246,4 +250,7 @@ func TestZenodoRemote(t *testing.T) { require.NoError(t, err) require.NoError(t, fd.Close()) assert.Equal(t, []byte(files["data.txt"]), data) + do, ok = o.(fs.MimeTyper) + require.True(t, ok) + assert.Equal(t, "text/plain; charset=utf-8", do.MimeType(context.Background())) } From 1d9657a37898e115258d2cf07fdb62957e3fe428 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 4 Jun 2025 13:02:41 +0200 Subject: [PATCH 3/5] refactor: use doiProvider interface --- backend/doi/dataverse.go | 77 +++++++++++++---------------- backend/doi/doi.go | 104 +++++++++++++++++++++++++++------------ backend/doi/invenio.go | 49 +++++++++--------- docs/content/doi.md | 26 ++++++++++ 4 files changed, 160 insertions(+), 96 deletions(-) diff --git a/backend/doi/dataverse.go b/backend/doi/dataverse.go index 20e5afbc70e88..d4440c6973f5e 100644 --- a/backend/doi/dataverse.go +++ b/backend/doi/dataverse.go @@ -35,48 +35,35 @@ func resolveDataverseEndpoint(resolvedURL *url.URL) (provider Provider, endpoint return Dataverse, endpointURL, nil } -// Implements Fs.List() for Dataverse installations -func (f *Fs) listDataverse(ctx context.Context, dir string) (entries fs.DirEntries, err error) { - fileEntries, err := f.listDataverseDoiFiles(ctx) - if err != nil { - return nil, fmt.Errorf("error listing %q: %w", dir, err) - } +// dataverseProvider implements the doiProvider interface for Dataverse installations +type dataverseProvider struct { + f *Fs +} + +// CanHaveSubDirs is true when the remote can have subdirectories +func (dp *dataverseProvider) CanHaveSubDirs() bool { + return true +} - fullDir := path.Join(f.root, dir) - if fullDir != "" { - fullDir += "/" +// IsFile returns true if remote is a file +func (dp *dataverseProvider) IsFile(ctx context.Context, remote string) (isFile bool, err error) { + entries, err := dp.ListEntries(ctx) + if err != nil { + return false, err } - dirPaths := map[string]bool{} - for _, entry := range fileEntries { - // First, filter out files not in `fullDir` - if !strings.HasPrefix(entry.remote, fullDir) { - continue - } - // Then, find entries in subfolers - remotePath := entry.remote - if fullDir != "" { - remotePath = strings.TrimLeft(strings.TrimPrefix(remotePath, fullDir), "/") - } - parts := strings.SplitN(remotePath, "/", 2) - if len(parts) == 1 { - newEntry := *entry - newEntry.remote = path.Join(dir, remotePath) - entries = append(entries, &newEntry) - } else { - dirPaths[path.Join(dir, parts[0])] = true + for _, entry := range entries { + if entry.remote == remote { + isFile = true + break } } - for dirPath := range dirPaths { - entry := fs.NewDir(dirPath, time.Time{}) - entries = append(entries, entry) - } - return entries, nil + return isFile, nil } -// List the files contained in the DOI -func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err error) { +// ListEntries returns the full list of entries found at the remote, regardless of root +func (dp *dataverseProvider) ListEntries(ctx context.Context) (entries []*Object, err error) { // Use the cache if populated - cachedEntries, found := f.cache.GetMaybe("files") + cachedEntries, found := dp.f.cache.GetMaybe("files") if found { parsedEntries, ok := cachedEntries.([]Object) if ok { @@ -88,7 +75,7 @@ func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err } } - filesURL := f.endpoint + filesURL := dp.f.endpoint var res *http.Response var result api.DataverseDatasetResponse opts := rest.Opts{ @@ -96,8 +83,8 @@ func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err Path: strings.TrimLeft(filesURL.EscapedPath(), "/"), Parameters: filesURL.Query(), } - err = f.pacer.Call(func() (bool, error) { - res, err = f.srv.CallJSON(ctx, &opts, nil, &result) + err = dp.f.pacer.Call(func() (bool, error) { + res, err = dp.f.srv.CallJSON(ctx, &opts, nil, &result) return shouldRetry(ctx, res, err) }) if err != nil { @@ -105,16 +92,16 @@ func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err } modTime, modTimeErr := time.Parse(time.RFC3339, result.Data.LatestVersion.LastUpdateTime) if modTimeErr != nil { - fs.Logf(f, "error: could not parse last update time %v", modTimeErr) + fs.Logf(dp.f, "error: could not parse last update time %v", modTimeErr) modTime = timeUnset } for _, file := range result.Data.LatestVersion.Files { contentURLPath := fmt.Sprintf("/api/access/datafile/%d", file.DataFile.ID) query := url.Values{} query.Add("format", "original") - contentURL := f.endpoint.ResolveReference(&url.URL{Path: contentURLPath, RawQuery: query.Encode()}) + contentURL := dp.f.endpoint.ResolveReference(&url.URL{Path: contentURLPath, RawQuery: query.Encode()}) entry := &Object{ - fs: f, + fs: dp.f, remote: path.Join(file.DirectoryLabel, file.DataFile.Filename), contentURL: contentURL.String(), size: file.DataFile.FileSize, @@ -134,6 +121,12 @@ func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err for _, entry := range entries { cacheEntries = append(cacheEntries, *entry) } - f.cache.Put("files", cacheEntries) + dp.f.cache.Put("files", cacheEntries) return entries, nil } + +func newDataverseProvider(f *Fs) doiProvider { + return &dataverseProvider{ + f: f, + } +} diff --git a/backend/doi/doi.go b/backend/doi/doi.go index ebe22c7cda252..25a030c0be181 100644 --- a/backend/doi/doi.go +++ b/backend/doi/doi.go @@ -111,6 +111,7 @@ type Fs struct { name string // name of this remote root string // the path we are working on provider Provider // the DOI provider + doiProvider doiProvider // the interface used to interact with the DOI provider features *fs.Features // optional features opt Options // options for this backend ci *fs.ConfigInfo // global config @@ -132,6 +133,16 @@ type Object struct { md5 string // MD5 hash of the object content } +// doiProvider is the interface used to list objects in a DOI +type doiProvider interface { + // CanHaveSubDirs is true when the remote can have subdirectories + CanHaveSubDirs() bool + // IsFile returns true if remote is a file + IsFile(ctx context.Context, remote string) (isFile bool, err error) + // ListEntries returns the full list of entries found at the remote, regardless of root + ListEntries(ctx context.Context) (entries []*Object, err error) +} + // Parse the input string as a DOI // Examples: // 10.1000/182 -> 10.1000/182 @@ -240,24 +251,17 @@ func (f *Fs) httpConnection(ctx context.Context, opt *Options) (isFile bool, err f.provider = provider f.opt.Provider = string(provider) - // Determine if the root is a file switch f.provider { case Dataverse: - entries, err := f.listDataverseDoiFiles(ctx) - if err != nil { - return false, err - } - for _, entry := range entries { - if entry.remote == f.root { - isFile = true - break - } - } + f.doiProvider = newDataverseProvider(f) case Invenio, Zenodo: - isFile = f.root != "" + f.doiProvider = newInvenioProvider(f) + default: + return false, fmt.Errorf("provider type '%s' not supported", f.provider) } - return isFile, nil + // Determine if the root is a file + return f.doiProvider.IsFile(ctx, f.root) } // retryErrorCodes is a slice of error codes that we will retry @@ -270,8 +274,8 @@ var retryErrorCodes = []int{ 509, // Bandwidth Limit Exceeded } -// shouldRetry returns a boolean as to whether this resp and err -// deserve to be retried. It returns the err as a convenience +// shouldRetry returns a boolean as to whether this res and err +// deserve to be retried. It returns the err as a convenience. func shouldRetry(ctx context.Context, res *http.Response, err error) (bool, error) { if fserrors.ContextError(ctx, &err) { return false, err @@ -373,16 +377,7 @@ func (f *Fs) Rmdir(ctx context.Context, dir string) error { // NewObject creates a new remote http file object func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { - var entries []*Object - var err error - switch f.provider { - case Dataverse: - entries, err = f.listDataverseDoiFiles(ctx) - case Invenio, Zenodo: - entries, err = f.listInvevioDoiFiles(ctx) - default: - err = fmt.Errorf("provider type '%s' not supported", f.provider) - } + entries, err := f.doiProvider.ListEntries(ctx) if err != nil { return nil, err } @@ -406,14 +401,59 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { // This should return ErrDirNotFound if the directory isn't // found. func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { - switch f.provider { - case Dataverse: - return f.listDataverse(ctx, dir) - case Invenio, Zenodo: - return f.listInvenio(ctx, dir) - default: - return nil, fmt.Errorf("provider type '%s' not supported", f.provider) + if f.doiProvider.CanHaveSubDirs() { + fileEntries, err := f.doiProvider.ListEntries(ctx) + if err != nil { + return nil, fmt.Errorf("error listing %q: %w", dir, err) + } + + fullDir := path.Join(f.root, dir) + if fullDir != "" { + fullDir += "/" + } + dirPaths := map[string]bool{} + for _, entry := range fileEntries { + // First, filter out files not in `fullDir` + if !strings.HasPrefix(entry.remote, fullDir) { + continue + } + // Then, find entries in subfolers + remotePath := entry.remote + if fullDir != "" { + remotePath = strings.TrimLeft(strings.TrimPrefix(remotePath, fullDir), "/") + } + parts := strings.SplitN(remotePath, "/", 2) + if len(parts) == 1 { + newEntry := *entry + newEntry.remote = path.Join(dir, remotePath) + entries = append(entries, &newEntry) + } else { + dirPaths[path.Join(dir, parts[0])] = true + } + } + for dirPath := range dirPaths { + entry := fs.NewDir(dirPath, time.Time{}) + entries = append(entries, entry) + } + return entries, nil } + + if !f.doiProvider.CanHaveSubDirs() { + if dir != "" { + return nil, fs.ErrorDirNotFound + } + + fileEntries, err := f.doiProvider.ListEntries(ctx) + if err != nil { + return nil, fmt.Errorf("error listing %q: %w", dir, err) + } + for _, entry := range fileEntries { + entries = append(entries, entry) + } + return entries, nil + } + + return nil, fmt.Errorf("provider type '%s' not supported", f.provider) } // Put in to the remote path with the modTime given of the given size diff --git a/backend/doi/invenio.go b/backend/doi/invenio.go index e6d1c8f073de4..94a584156a30f 100644 --- a/backend/doi/invenio.go +++ b/backend/doi/invenio.go @@ -98,26 +98,25 @@ func checkInvenioAPIURL(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, return url.Parse(result.Links.Self) } -// Implements Fs.List() for Invenio -func (f *Fs) listInvenio(ctx context.Context, dir string) (entries fs.DirEntries, err error) { - if dir != "" { - return nil, fs.ErrorDirNotFound - } +// invenioProvider implements the doiProvider interface for InvenioRDM installations +type invenioProvider struct { + f *Fs +} - fileEntries, err := f.listInvevioDoiFiles(ctx) - if err != nil { - return nil, fmt.Errorf("error listing %q: %w", dir, err) - } - for _, entry := range fileEntries { - entries = append(entries, entry) - } - return entries, nil +// CanHaveSubDirs is true when the remote can have subdirectories +func (ip *invenioProvider) CanHaveSubDirs() bool { + return false +} + +// IsFile returns true if remote is a file +func (ip *invenioProvider) IsFile(ctx context.Context, remote string) (isFile bool, err error) { + return remote != "", nil } -// List the files contained in the DOI -func (f *Fs) listInvevioDoiFiles(ctx context.Context) (entries []*Object, err error) { +// ListEntries returns the full list of entries found at the remote, regardless of root +func (ip *invenioProvider) ListEntries(ctx context.Context) (entries []*Object, err error) { // Use the cache if populated - cachedEntries, found := f.cache.GetMaybe("files") + cachedEntries, found := ip.f.cache.GetMaybe("files") if found { parsedEntries, ok := cachedEntries.([]Object) if ok { @@ -129,14 +128,14 @@ func (f *Fs) listInvevioDoiFiles(ctx context.Context) (entries []*Object, err er } } - filesURL := f.endpoint.JoinPath("files") + filesURL := ip.f.endpoint.JoinPath("files") var result api.InvenioFilesResponse opts := rest.Opts{ Method: "GET", Path: strings.TrimLeft(filesURL.EscapedPath(), "/"), } - err = f.pacer.Call(func() (bool, error) { - res, err := f.srv.CallJSON(ctx, &opts, nil, &result) + err = ip.f.pacer.Call(func() (bool, error) { + res, err := ip.f.srv.CallJSON(ctx, &opts, nil, &result) return shouldRetry(ctx, res, err) }) if err != nil { @@ -145,11 +144,11 @@ func (f *Fs) listInvevioDoiFiles(ctx context.Context) (entries []*Object, err er for _, file := range result.Entries { modTime, modTimeErr := time.Parse(time.RFC3339, file.Updated) if modTimeErr != nil { - fs.Logf(f, "error: could not parse last update time %v", modTimeErr) + fs.Logf(ip.f, "error: could not parse last update time %v", modTimeErr) modTime = timeUnset } entry := &Object{ - fs: f, + fs: ip.f, remote: file.Key, contentURL: file.Links.Content, size: file.Size, @@ -164,6 +163,12 @@ func (f *Fs) listInvevioDoiFiles(ctx context.Context) (entries []*Object, err er for _, entry := range entries { cacheEntries = append(cacheEntries, *entry) } - f.cache.Put("files", cacheEntries) + ip.f.cache.Put("files", cacheEntries) return entries, nil } + +func newInvenioProvider(f *Fs) doiProvider { + return &invenioProvider{ + f: f, + } +} diff --git a/docs/content/doi.md b/docs/content/doi.md index f608f727a7a81..1d8e20dbe2013 100644 --- a/docs/content/doi.md +++ b/docs/content/doi.md @@ -105,6 +105,32 @@ Properties: - "invenio" - Invenio +#### --doi-doi-resolver-api-url + +The URL of the DOI resolver API to use. + +The DOI resolver can be set for testing or for cases when the the canonical DOI resolver API cannot be used. + +Defaults to "https://doi.org/api". + +Properties: + +- Config: doi_resolver_api_url +- Env Var: RCLONE_DOI_DOI_RESOLVER_API_URL +- Type: string +- Required: false + +#### --doi-description + +Description of the remote. + +Properties: + +- Config: description +- Env Var: RCLONE_DOI_DESCRIPTION +- Type: string +- Required: false + ## Backend commands Here are the commands specific to the doi backend. From 35317e9db012abdb0cbea76732d9bda22ae6cdee Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 4 Jun 2025 13:08:47 +0200 Subject: [PATCH 4/5] cleanup --- backend/doi/doi.go | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/backend/doi/doi.go b/backend/doi/doi.go index 25a030c0be181..8bfd3ffe24537 100644 --- a/backend/doi/doi.go +++ b/backend/doi/doi.go @@ -401,12 +401,20 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { // This should return ErrDirNotFound if the directory isn't // found. func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { - if f.doiProvider.CanHaveSubDirs() { - fileEntries, err := f.doiProvider.ListEntries(ctx) - if err != nil { - return nil, fmt.Errorf("error listing %q: %w", dir, err) - } + if !f.doiProvider.CanHaveSubDirs() && dir != "" { + return nil, fs.ErrorDirNotFound + } + + fileEntries, err := f.doiProvider.ListEntries(ctx) + if err != nil { + return nil, fmt.Errorf("error listing %q: %w", dir, err) + } + if !f.doiProvider.CanHaveSubDirs() { + for _, entry := range fileEntries { + entries = append(entries, entry) + } + } else { fullDir := path.Join(f.root, dir) if fullDir != "" { fullDir += "/" @@ -435,25 +443,9 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e entry := fs.NewDir(dirPath, time.Time{}) entries = append(entries, entry) } - return entries, nil - } - - if !f.doiProvider.CanHaveSubDirs() { - if dir != "" { - return nil, fs.ErrorDirNotFound - } - - fileEntries, err := f.doiProvider.ListEntries(ctx) - if err != nil { - return nil, fmt.Errorf("error listing %q: %w", dir, err) - } - for _, entry := range fileEntries { - entries = append(entries, entry) - } - return entries, nil } - return nil, fmt.Errorf("provider type '%s' not supported", f.provider) + return entries, nil } // Put in to the remote path with the modTime given of the given size From e3f8f2a29330ae0481c9c2a3797a7e904be644fe Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 4 Jun 2025 13:13:55 +0200 Subject: [PATCH 5/5] lint --- backend/doi/doi_internal_test.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/backend/doi/doi_internal_test.go b/backend/doi/doi_internal_test.go index ad3622982d827..22046be098d4f 100644 --- a/backend/doi/doi_internal_test.go +++ b/backend/doi/doi_internal_test.go @@ -52,7 +52,7 @@ func TestParseDoi(t *testing.T) { } // prepareMockDoiResolverServer prepares a test server to resolve DOIs -func prepareMockDoiResolverServer(t *testing.T, resolvedURL string) (doiResolverApiUrl string) { +func prepareMockDoiResolverServer(t *testing.T, resolvedURL string) (doiResolverAPIURL string) { mux := http.NewServeMux() // Handle requests for resolving DOIs @@ -81,7 +81,8 @@ func prepareMockDoiResolverServer(t *testing.T, resolvedURL string) (doiResolver resultBytes, err := json.Marshal(result) require.NoError(t, err) w.Header().Add("Content-Type", "application/json") - w.Write(resultBytes) + _, err = w.Write(resultBytes) + require.NoError(t, err) }) // Make the test server @@ -120,7 +121,8 @@ func prepareMockZenodoServer(t *testing.T, files map[string]string) *httptest.Se resultBytes, err := json.Marshal(result) require.NoError(t, err) w.Header().Add("Content-Type", "application/json") - w.Write(resultBytes) + _, err = w.Write(resultBytes) + require.NoError(t, err) }) // Handle requests for listing files in a record mux.HandleFunc("GET /api/records/{record}/files", func(w http.ResponseWriter, r *http.Request) { @@ -151,7 +153,8 @@ func prepareMockZenodoServer(t *testing.T, files map[string]string) *httptest.Se resultBytes, err := json.Marshal(result) require.NoError(t, err) w.Header().Add("Content-Type", "application/json") - w.Write(resultBytes) + _, err = w.Write(resultBytes) + require.NoError(t, err) }) // Handle requests for file contents mux.HandleFunc("/api/files/{file}", func(w http.ResponseWriter, r *http.Request) { @@ -165,7 +168,8 @@ func prepareMockZenodoServer(t *testing.T, files map[string]string) *httptest.Se } // Return the most basic response - w.Write([]byte(contents)) + _, err := w.Write([]byte(contents)) + require.NoError(t, err) }) // Make the test server @@ -190,13 +194,13 @@ func TestZenodoRemote(t *testing.T) { ts := prepareMockZenodoServer(t, files) resolvedURL := ts.URL + "/record/" + recordID - doiResolverApiUrl := prepareMockDoiResolverServer(t, resolvedURL) + doiResolverAPIURL := prepareMockDoiResolverServer(t, resolvedURL) testConfig := configmap.Simple{ "type": "doi", "doi": doi, "provider": "zenodo", - "doi_resolver_api_url": doiResolverApiUrl, + "doi_resolver_api_url": doiResolverAPIURL, } f, err := NewFs(context.Background(), remoteName, "", testConfig) require.NoError(t, err)