Skip to content

Commit 0ddfd6f

Browse files
author
Matt Hardcastle
committed
Add FLock to lock files another way
The build system uses a class called Singleton to handle file locking. This class has a few potential issues: 1) An instance of Single could delete a file owned by another instance 2) Calling `print` in `__enter__` doesn't write to STDOUT 3) If the lock file already exists but the process that "owned" it died without deleting it every process will be locked This change is an attempt to resolve these issues and provide some lock debugging code. The code is structured to default to using the current Singleton class unless the USE_FLOCK_CLS environmental variable is set. In the event the environment variable is set the Singleton class is replaced with FLock. The FLock class is designed to work while running on a machine that's also using the real Singleton class. It does this at the expense of not solving issue number #1. If FLock is proven successful and completely deployed to the cluster it should be made the default and the code that deletes the lock file in `__exit__` should be removed.
1 parent 3877a35 commit 0ddfd6f

File tree

1 file changed

+116
-2
lines changed

1 file changed

+116
-2
lines changed

hifi_singleton.py

+116-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import json
2+
import logging
13
import os
24
import platform
35
import time
@@ -7,6 +9,15 @@
79
except ImportError:
810
fcntl = None
911

12+
try:
13+
import msvcrt
14+
except ImportError:
15+
msvcrt = None
16+
17+
18+
logger = logging.getLogger(__name__)
19+
20+
1021
# Used to ensure only one instance of the script runs at a time
1122
class Singleton:
1223
def __init__(self, path):
@@ -33,7 +44,10 @@ def __enter__(self):
3344
else:
3445
self.fh.close()
3546
self.fh = None
36-
print("Couldn't aquire lock, retrying in 10 seconds")
47+
# print is horked here so write directly to stdout.
48+
with open(1, mode="w", closefd=False) as _stdout:
49+
_stdout.write("Couldn't aquire lock, retrying in 10 seconds\n")
50+
_stdout.flush()
3751
time.sleep(10)
3852
return self
3953

@@ -43,4 +57,104 @@ def __exit__(self, type, value, traceback):
4357
else:
4458
fcntl.lockf(self.fh, fcntl.LOCK_UN)
4559
self.fh.close()
46-
os.unlink(self.path)
60+
os.unlink(self.path)
61+
62+
63+
class FLock:
64+
"""
65+
File locking context manager
66+
67+
>> with FLock("/tmp/foo.lock"):
68+
>> do_something_that_must_be_synced()
69+
70+
The lock file must stick around forever. The author is not aware of a no cross platform way to clean it up w/o introducting race conditions.
71+
"""
72+
def __init__(self, path):
73+
self.fh = os.open(path, os.O_CREAT | os.O_RDWR)
74+
self.path = path
75+
76+
def _lock_posix(self):
77+
try:
78+
fcntl.lockf(self.fh, fcntl.LOCK_EX | fcntl.LOCK_NB)
79+
except BlockingIOError:
80+
# Windows sleeps for 10 seconds before giving up on a lock.
81+
# Lets mimic that behavior.
82+
time.sleep(10)
83+
return False
84+
else:
85+
return True
86+
87+
def _lock_windows(self):
88+
try:
89+
msvcrt.locking(self.fh, msvcrt.LK_LOCK, 1)
90+
except OSError:
91+
return False
92+
else:
93+
return True
94+
95+
if fcntl is not None:
96+
_lock = _lock_posix
97+
elif msvcrt is not None:
98+
_lock = _lock_windows
99+
else:
100+
raise RuntimeError("No locking library found")
101+
102+
def read_stats(self):
103+
data = {}
104+
with open(self.fh, mode="r", closefd=False) as stats_file:
105+
stats_file.seek(0)
106+
try:
107+
data = json.loads(stats_file.read())
108+
except json.decoder.JSONDecodeError:
109+
logger.warning("couldn't decode json in lock file")
110+
except PermissionError:
111+
# Can't read a locked file on Windows :(
112+
pass
113+
114+
lock_age = time.time() - os.fstat(self.fh).st_mtime
115+
if lock_age > 0:
116+
data["Age"] = "%0.2f" % lock_age
117+
118+
with open(1, mode="w", closefd=False) as _stdout:
119+
_stdout.write("Lock stats:\n")
120+
for key, value in sorted(data.items()):
121+
_stdout.write("* %s: %s\n" % (key, value))
122+
_stdout.flush()
123+
124+
def write_stats(self):
125+
stats = {
126+
"Owner PID": os.getpid(),
127+
}
128+
flock_env_vars = os.getenv("FLOCK_ENV_VARS")
129+
if flock_env_vars:
130+
for env_var_name in flock_env_vars.split(":"):
131+
stats[env_var_name] = os.getenv(env_var_name)
132+
133+
with open(self.fh, mode="w", closefd=False) as stats_file:
134+
stats_file.truncate()
135+
return stats_file.write(json.dumps(stats, indent=2))
136+
137+
def __enter__(self):
138+
while not self._lock():
139+
try:
140+
self.read_stats()
141+
except (IOError, ValueError) as exc:
142+
logger.exception("couldn't read stats")
143+
time.sleep(3.33) # don't hammer the file
144+
145+
self.write_stats()
146+
147+
return self
148+
149+
def __exit__(self, type, value, traceback):
150+
os.close(self.fh)
151+
# WARNING: `os.close` gives up the lock on `fh` then we attempt the `os.unlink`. On posix platforms this can lead to us deleting a lock file that another process owns. This step is required to maintain compatablity with Singleton. When and if FLock is completely rolled out to the build fleet this unlink should be removed.
152+
try:
153+
os.unlink(self.path)
154+
except (FileNotFoundError, PermissionError):
155+
logger.exception("couldn't unlink lock file")
156+
157+
158+
if os.getenv("USE_FLOCK_CLS") is not None:
159+
logger.warning("Using FLock locker")
160+
Singleton = FLock

0 commit comments

Comments
 (0)