-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdelicious2fluiddb.py
103 lines (95 loc) · 3.26 KB
/
delicious2fluiddb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#
# delicious2fluiddb.py: Import all public bookmarks and their tags
# from del.icio.us and upload them to FluidDB.
#
# Copyright (c) Nicholas J. Radcliffe 2009
#
# See LICENSE for license.
#
import types, sys
import fdb
#import fdbdummy as fdb
from delicious import *
try:
from abouttag.uri import URI
except ImportError:
print 'You need the abouttag library on your PYTHONPATH.'
print 'It is available from https://github.com/njr0/abouttag'
raise
class Entry:
def __init__ (self, url, description, tags, shared, extended):
self.url = url
self.description = description
self.tags = tags
self.shared = shared
self.extended = extended
def __str__ (self):
return '\n'.join (['%12s: %s' % (k, str (self.__dict__[k]))
for k in ['url', 'description', 'tags', 'extended', 'shared']])
def Process (entries, p):
doc = ParseXMLString (entries)
entries = []
posts = FindFirstNamedChild (doc, 'posts')
for node in posts.childNodes:
description = ''
extended = ''
url = ''
tags = []
shared = True
if isElement (node):
for key in node.attributes.keys ():
if key == 'shared':
shared = node.attributes[key].value.encode ('ascii',
'ignore') != "no"
if key == 'tag':
tags = node.attributes[key].value.encode ('ascii',
'ignore').split (' ')
elif key == 'extended':
extended = node.attributes[key].value.encode ('ascii',
'ignore')
elif key == 'description':
description = node.attributes[key].value.encode ('ascii',
'ignore')
elif key == 'href':
url = node.attributes[key].value
entries.append (Entry (url, description, tags, shared, extended))
return entries
def GetEntryList (p):
xmlEntries = ReadXML (p.cache)
entries = Process (xmlEntries, p)
return entries
if __name__ == '__main__':
startAt = 0 if len(sys.argv) < 2 else int(sys.argv[1])
p = GetCredentials()
entries = GetEntryList(p)
nTotal = len(entries)
entries = [e for e in entries if e.shared == True]
nShared = len(entries)
print 'Removed %d private entries' % (nTotal - nShared)
db = fdb.FluidDB()
nURLs = nTags = 0
tagsUsed = set()
for i, entry in enumerate(entries[startAt:]):
if entry.url:
uri = URI(unicode(entry.url)).encode('UTF-8')
print '%4d: Tagging %s as %s:' % (i + startAt, entry.url, uri)
nURLs += 1
else:
print 'Blank URL'
o = db.create_object(uri)
if type (o) == types.IntType: # error
print 'Error occurred, code %d' % o
else:
for tag in [t for t in entry.tags if t]: # no empty tags
error = db.tag_object_by_id (o.id, tag)
print ' %s' % tag,
sys.stdout.flush ()
if error == 0:
nTags += 1
tagsUsed.add (tag)
else:
print '\n ---> FAILURE!'
print
print ('%d URLs tagged in FluidDB, with a total of %d tags (%d distinct)'
% (nURLs, nTags, len (tagsUsed)))
print '\nTags were: %s' % (' '.join ([tag for tag in tagsUsed]))