You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm not sure if this is a problem with the error handling, or if we need to add some kind of retry logic to slurm script. We've seen a few jobs fail like this:
Traceback (most recent call last):
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/backends/utils.py", line 84, in _execute
return self.cursor.execute(sql, params)
psycopg2.OperationalError: SSL connection has been closed unexpectedly
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/share/Kive/kive/container/management/commands/runcontainer.py", line 47, in handle
run.save()
File "/usr/local/share/Kive/kive/container/models.py", line 1107, in save
update_fields)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/base.py", line 741, in save
force_update=force_update, update_fields=update_fields)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/base.py", line 779, in save_base
force_update, using, update_fields,
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/base.py", line 851, in _save_table
forced_update)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/base.py", line 900, in _do_update
return filtered._update(values) > 0
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/query.py", line 760, in _update
return query.get_compiler(self.db).execute_sql(CURSOR)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/sql/compiler.py", line 1462, in execute_sql
cursor = super().execute_sql(result_type)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/sql/compiler.py", line 1133, in execute_sql
cursor.execute(sql, params)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/backends/utils.py", line 67, in execute
return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/backends/utils.py", line 76, in _execute_with_wrappers
return executor(sql, params, many, context)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/backends/utils.py", line 84, in _execute
return self.cursor.execute(sql, params)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/utils.py", line 89, in __exit__
raise dj_exc_value.with_traceback(traceback) from exc_value
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/backends/utils.py", line 84, in _execute
return self.cursor.execute(sql, params)
django.db.utils.OperationalError: SSL connection has been closed unexpectedly
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/backends/base/base.py", line 235, in _cursor
return self._prepare_cursor(self.create_cursor(name))
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/backends/postgresql/base.py", line 223, in create_cursor
cursor = self.connection.cursor()
psycopg2.InterfaceError: connection already closed
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/var/spool/slurmd/job110371/slurm_script", line 10, in <module>
execute_from_command_line(sys.argv)
File "/opt/venv_kive/lib/python3.7/site-packages/django/core/management/__init__.py", line 381, in execute_from_command_line
utility.execute()
File "/opt/venv_kive/lib/python3.7/site-packages/django/core/management/__init__.py", line 375, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/opt/venv_kive/lib/python3.7/site-packages/django/core/management/base.py", line 323, in run_from_argv
self.execute(*args, **cmd_options)
File "/opt/venv_kive/lib/python3.7/site-packages/django/core/management/base.py", line 364, in execute
output = self.handle(*args, **options)
File "/usr/local/share/Kive/kive/container/management/commands/runcontainer.py", line 54, in handle
run.save()
File "/usr/local/share/Kive/kive/container/models.py", line 1107, in save
update_fields)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/base.py", line 741, in save
force_update=force_update, update_fields=update_fields)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/base.py", line 779, in save_base
force_update, using, update_fields,
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/base.py", line 851, in _save_table
forced_update)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/base.py", line 900, in _do_update
return filtered._update(values) > 0
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/query.py", line 760, in _update
return query.get_compiler(self.db).execute_sql(CURSOR)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/sql/compiler.py", line 1462, in execute_sql
cursor = super().execute_sql(result_type)
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/models/sql/compiler.py", line 1131, in execute_sql
cursor = self.connection.cursor()
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/backends/base/base.py", line 256, in cursor
return self._cursor()
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/backends/base/base.py", line 235, in _cursor
return self._prepare_cursor(self.create_cursor(name))
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/utils.py", line 89, in __exit__
raise dj_exc_value.with_traceback(traceback) from exc_value
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/backends/base/base.py", line 235, in _cursor
return self._prepare_cursor(self.create_cursor(name))
File "/opt/venv_kive/lib/python3.7/site-packages/django/db/backends/postgresql/base.py", line 223, in create_cursor
cursor = self.connection.cursor()
django.db.utils.InterfaceError: connection already closed
The text was updated successfully, but these errors were encountered:
I'm not sure if this is a problem with the error handling, or if we need to add some kind of retry logic to slurm script. We've seen a few jobs fail like this:
The text was updated successfully, but these errors were encountered: