Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion backend/doi/api/inveniotypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ package api
// InvenioRecordResponse is the representation of a record stored in InvenioRDM
type InvenioRecordResponse struct {
Links InvenioRecordResponseLinks `json:"links"`
// Metadata InvenioRecordMetadata `json:"metadata"`
}

// InvenioRecordResponseLinks represents a record's links
Expand Down
77 changes: 35 additions & 42 deletions backend/doi/dataverse.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,48 +35,35 @@ func resolveDataverseEndpoint(resolvedURL *url.URL) (provider Provider, endpoint
return Dataverse, endpointURL, nil
}

// Implements Fs.List() for Dataverse installations
func (f *Fs) listDataverse(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
fileEntries, err := f.listDataverseDoiFiles(ctx)
if err != nil {
return nil, fmt.Errorf("error listing %q: %w", dir, err)
}
// dataverseProvider implements the doiProvider interface for Dataverse installations
type dataverseProvider struct {
f *Fs
}

// CanHaveSubDirs is true when the remote can have subdirectories
func (dp *dataverseProvider) CanHaveSubDirs() bool {
return true
}

fullDir := path.Join(f.root, dir)
if fullDir != "" {
fullDir += "/"
// IsFile returns true if remote is a file
func (dp *dataverseProvider) IsFile(ctx context.Context, remote string) (isFile bool, err error) {
entries, err := dp.ListEntries(ctx)
if err != nil {
return false, err
}
dirPaths := map[string]bool{}
for _, entry := range fileEntries {
// First, filter out files not in `fullDir`
if !strings.HasPrefix(entry.remote, fullDir) {
continue
}
// Then, find entries in subfolers
remotePath := entry.remote
if fullDir != "" {
remotePath = strings.TrimLeft(strings.TrimPrefix(remotePath, fullDir), "/")
}
parts := strings.SplitN(remotePath, "/", 2)
if len(parts) == 1 {
newEntry := *entry
newEntry.remote = path.Join(dir, remotePath)
entries = append(entries, &newEntry)
} else {
dirPaths[path.Join(dir, parts[0])] = true
for _, entry := range entries {
if entry.remote == remote {
isFile = true
break
}
}
for dirPath := range dirPaths {
entry := fs.NewDir(dirPath, time.Time{})
entries = append(entries, entry)
}
return entries, nil
return isFile, nil
}

// List the files contained in the DOI
func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err error) {
// ListEntries returns the full list of entries found at the remote, regardless of root
func (dp *dataverseProvider) ListEntries(ctx context.Context) (entries []*Object, err error) {
// Use the cache if populated
cachedEntries, found := f.cache.GetMaybe("files")
cachedEntries, found := dp.f.cache.GetMaybe("files")
if found {
parsedEntries, ok := cachedEntries.([]Object)
if ok {
Expand All @@ -88,33 +75,33 @@ func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err
}
}

filesURL := f.endpoint
filesURL := dp.f.endpoint
var res *http.Response
var result api.DataverseDatasetResponse
opts := rest.Opts{
Method: "GET",
Path: strings.TrimLeft(filesURL.EscapedPath(), "/"),
Parameters: filesURL.Query(),
}
err = f.pacer.Call(func() (bool, error) {
res, err = f.srv.CallJSON(ctx, &opts, nil, &result)
err = dp.f.pacer.Call(func() (bool, error) {
res, err = dp.f.srv.CallJSON(ctx, &opts, nil, &result)
return shouldRetry(ctx, res, err)
})
if err != nil {
return nil, fmt.Errorf("readDir failed: %w", err)
}
modTime, modTimeErr := time.Parse(time.RFC3339, result.Data.LatestVersion.LastUpdateTime)
if modTimeErr != nil {
fs.Logf(f, "error: could not parse last update time %v", modTimeErr)
fs.Logf(dp.f, "error: could not parse last update time %v", modTimeErr)
modTime = timeUnset
}
for _, file := range result.Data.LatestVersion.Files {
contentURLPath := fmt.Sprintf("/api/access/datafile/%d", file.DataFile.ID)
query := url.Values{}
query.Add("format", "original")
contentURL := f.endpoint.ResolveReference(&url.URL{Path: contentURLPath, RawQuery: query.Encode()})
contentURL := dp.f.endpoint.ResolveReference(&url.URL{Path: contentURLPath, RawQuery: query.Encode()})
entry := &Object{
fs: f,
fs: dp.f,
remote: path.Join(file.DirectoryLabel, file.DataFile.Filename),
contentURL: contentURL.String(),
size: file.DataFile.FileSize,
Expand All @@ -134,6 +121,12 @@ func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err
for _, entry := range entries {
cacheEntries = append(cacheEntries, *entry)
}
f.cache.Put("files", cacheEntries)
dp.f.cache.Put("files", cacheEntries)
return entries, nil
}

func newDataverseProvider(f *Fs) doiProvider {
return &dataverseProvider{
f: f,
}
}
117 changes: 82 additions & 35 deletions backend/doi/doi.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,15 @@ The DOI provider can be set when rclone does not automatically recognize a suppo
}},
Required: false,
Advanced: true,
}, {
Name: "doi_resolver_api_url",
Help: `The URL of the DOI resolver API to use.

The DOI resolver can be set for testing or for cases when the the canonical DOI resolver API cannot be used.

Defaults to "https://doi.org/api".`,
Required: false,
Advanced: true,
}},
}
fs.Register(fsi)
Expand All @@ -92,15 +101,17 @@ const (

// Options defines the configuration for this backend
type Options struct {
Doi string `config:"doi"` // The DOI, a digital identifier of an object, usually a dataset
Provider string `config:"provider"` // The DOI provider
Doi string `config:"doi"` // The DOI, a digital identifier of an object, usually a dataset
Provider string `config:"provider"` // The DOI provider
DoiResolverAPIURL string `config:"doi_resolver_api_url"` // The URL of the DOI resolver API to use.
}

// Fs stores the interface to the remote HTTP files
type Fs struct {
name string // name of this remote
root string // the path we are working on
provider Provider // the DOI provider
doiProvider doiProvider // the interface used to interact with the DOI provider
features *fs.Features // optional features
opt Options // options for this backend
ci *fs.ConfigInfo // global config
Expand All @@ -122,6 +133,16 @@ type Object struct {
md5 string // MD5 hash of the object content
}

// doiProvider is the interface used to list objects in a DOI
type doiProvider interface {
// CanHaveSubDirs is true when the remote can have subdirectories
CanHaveSubDirs() bool
// IsFile returns true if remote is a file
IsFile(ctx context.Context, remote string) (isFile bool, err error)
// ListEntries returns the full list of entries found at the remote, regardless of root
ListEntries(ctx context.Context) (entries []*Object, err error)
}

// Parse the input string as a DOI
// Examples:
// 10.1000/182 -> 10.1000/182
Expand All @@ -144,12 +165,17 @@ func parseDoi(doi string) string {
// Resolve a DOI to a URL
// Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation
func resolveDoiURL(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, opt *Options) (doiURL *url.URL, err error) {
resolverURL := opt.DoiResolverAPIURL
if resolverURL == "" {
resolverURL = doiResolverAPIURL
}

var result api.DoiResolverResponse
params := url.Values{}
params.Add("index", "1")
opts := rest.Opts{
Method: "GET",
RootURL: doiResolverAPIURL,
RootURL: resolverURL,
Path: "/handles/" + opt.Doi,
Parameters: params,
}
Expand Down Expand Up @@ -225,24 +251,17 @@ func (f *Fs) httpConnection(ctx context.Context, opt *Options) (isFile bool, err
f.provider = provider
f.opt.Provider = string(provider)

// Determine if the root is a file
switch f.provider {
case Dataverse:
entries, err := f.listDataverseDoiFiles(ctx)
if err != nil {
return false, err
}
for _, entry := range entries {
if entry.remote == f.root {
isFile = true
break
}
}
f.doiProvider = newDataverseProvider(f)
case Invenio, Zenodo:
isFile = f.root != ""
f.doiProvider = newInvenioProvider(f)
default:
return false, fmt.Errorf("provider type '%s' not supported", f.provider)
}

return isFile, nil
// Determine if the root is a file
return f.doiProvider.IsFile(ctx, f.root)
}

// retryErrorCodes is a slice of error codes that we will retry
Expand All @@ -255,8 +274,8 @@ var retryErrorCodes = []int{
509, // Bandwidth Limit Exceeded
}

// shouldRetry returns a boolean as to whether this resp and err
// deserve to be retried. It returns the err as a convenience
// shouldRetry returns a boolean as to whether this res and err
// deserve to be retried. It returns the err as a convenience.
func shouldRetry(ctx context.Context, res *http.Response, err error) (bool, error) {
if fserrors.ContextError(ctx, &err) {
return false, err
Expand Down Expand Up @@ -358,16 +377,7 @@ func (f *Fs) Rmdir(ctx context.Context, dir string) error {

// NewObject creates a new remote http file object
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
var entries []*Object
var err error
switch f.provider {
case Dataverse:
entries, err = f.listDataverseDoiFiles(ctx)
case Invenio, Zenodo:
entries, err = f.listInvevioDoiFiles(ctx)
default:
err = fmt.Errorf("provider type '%s' not supported", f.provider)
}
entries, err := f.doiProvider.ListEntries(ctx)
if err != nil {
return nil, err
}
Expand All @@ -391,14 +401,51 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
// This should return ErrDirNotFound if the directory isn't
// found.
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
switch f.provider {
case Dataverse:
return f.listDataverse(ctx, dir)
case Invenio, Zenodo:
return f.listInvenio(ctx, dir)
default:
return nil, fmt.Errorf("provider type '%s' not supported", f.provider)
if !f.doiProvider.CanHaveSubDirs() && dir != "" {
return nil, fs.ErrorDirNotFound
}

fileEntries, err := f.doiProvider.ListEntries(ctx)
if err != nil {
return nil, fmt.Errorf("error listing %q: %w", dir, err)
}

if !f.doiProvider.CanHaveSubDirs() {
for _, entry := range fileEntries {
entries = append(entries, entry)
}
} else {
fullDir := path.Join(f.root, dir)
if fullDir != "" {
fullDir += "/"
}
dirPaths := map[string]bool{}
for _, entry := range fileEntries {
// First, filter out files not in `fullDir`
if !strings.HasPrefix(entry.remote, fullDir) {
continue
}
// Then, find entries in subfolers
remotePath := entry.remote
if fullDir != "" {
remotePath = strings.TrimLeft(strings.TrimPrefix(remotePath, fullDir), "/")
}
parts := strings.SplitN(remotePath, "/", 2)
if len(parts) == 1 {
newEntry := *entry
newEntry.remote = path.Join(dir, remotePath)
entries = append(entries, &newEntry)
} else {
dirPaths[path.Join(dir, parts[0])] = true
}
}
for dirPath := range dirPaths {
entry := fs.NewDir(dirPath, time.Time{})
entries = append(entries, entry)
}
}

return entries, nil
}

// Put in to the remote path with the modTime given of the given size
Expand Down
Loading