Skip to content

Commit b2c4523

Browse files
authored
Reduce memory usage of "ExplicitPeakBounds" and "SpectrumMetadata" (#3374)
Add some subclasses of "ImmutableList" which can store some types of data more efficiently: ConstantList: for when all of the elements in the list are identical IntegerList: for when the elements are integers but their values are in a range that can be represented by less than 4 bytes. Factor: for when the list is long but the number of unique values is small so they can be represented as integer indexes into the list of unique values NullableList: stored as a list of struct's alongside a list of bits.
1 parent ff3d22e commit b2c4523

File tree

14 files changed

+1047
-211
lines changed

14 files changed

+1047
-211
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Original author: Nicholas Shulman <nicksh .at. u.washington.edu>,
3+
* MacCoss Lab, Department of Genome Sciences, UW
4+
*
5+
* Copyright 2025 University of Washington - Seattle, WA
6+
*
7+
* Licensed under the Apache License, Version 2.0 (the "License");
8+
* you may not use this file except in compliance with the License.
9+
* You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
*/
19+
20+
using System;
21+
using System.Collections.Generic;
22+
using System.Linq;
23+
24+
namespace pwiz.Common.Collections
25+
{
26+
/// <summary>
27+
/// Stores items in a list as integer indexes into a list of possible values.
28+
/// The list of possible values are <see cref="Levels"/>.
29+
/// The indexes into the Levels list are stored in <see cref="LevelIndices"/>
30+
/// which may be stored as bits, bytes, shorts or integers depending on
31+
/// the highest value required.
32+
/// </summary>
33+
public class Factor<T> : ImmutableList<T>
34+
{
35+
public static Factor<T> FromItems(IEnumerable<T> items)
36+
{
37+
return FromItemsWithLevels(items, ImmutableList<T>.EMPTY);
38+
}
39+
40+
public static Factor<T> FromItemsWithLevels(IEnumerable<T> items, ImmutableList<T> startingLevels)
41+
{
42+
if (items is Factor<T> factor && factor.Levels.Take(startingLevels.Count).SequenceEqual(startingLevels))
43+
{
44+
return factor;
45+
}
46+
var levelsDict = new Dictionary<ValueTuple<T>, int>();
47+
foreach (var level in startingLevels)
48+
{
49+
levelsDict.Add(ValueTuple.Create(level), levelsDict.Count);
50+
}
51+
var levelIndices = new List<int>();
52+
foreach (var item in items)
53+
{
54+
var key = ValueTuple.Create(item);
55+
if (!levelsDict.TryGetValue(key, out int levelIndex))
56+
{
57+
levelIndex = levelsDict.Count;
58+
levelsDict.Add(key, levelIndex);
59+
}
60+
61+
levelIndices.Add(levelIndex);
62+
}
63+
64+
ImmutableList<T> levels;
65+
if (levelsDict.Count == startingLevels.Count)
66+
{
67+
levels = startingLevels;
68+
}
69+
else
70+
{
71+
levels = levelsDict.OrderBy(kvp => kvp.Value).Select(kvp => kvp.Key.Item1).ToImmutable();
72+
}
73+
return new Factor<T>(levels, IntegerList.FromIntegers(levelIndices));
74+
}
75+
76+
public Factor(ImmutableList<T> levels, ImmutableList<int> levelIndices)
77+
{
78+
Levels = levels;
79+
LevelIndices = levelIndices;
80+
}
81+
82+
public ImmutableList<T> Levels { get; }
83+
public ImmutableList<int> LevelIndices { get; }
84+
85+
public override int Count
86+
{
87+
get { return LevelIndices.Count; }
88+
}
89+
public override IEnumerator<T> GetEnumerator()
90+
{
91+
return LevelIndices.Select(i => Levels[i]).GetEnumerator();
92+
}
93+
94+
public override T this[int index]
95+
{
96+
get
97+
{
98+
return Levels[LevelIndices[index]];
99+
}
100+
}
101+
}
102+
}

pwiz_tools/Shared/CommonUtil/Collections/ImmutableList.cs

+143-28
Original file line numberDiff line numberDiff line change
@@ -140,18 +140,20 @@ public static ImmutableList<T> Singleton(T value)
140140
}
141141

142142
/// <summary>
143-
/// Private constructor to disallow any other implementations of this class.
143+
/// GetHashCode implementation which cannot be overridden
144+
/// in derived classes in order to preserve the semantics
145+
/// that ImmutableList instances are equal if and only if
146+
/// they contain the same items.
144147
/// </summary>
145-
private ImmutableList()
146-
{
147-
}
148-
149-
public override int GetHashCode()
148+
public sealed override int GetHashCode()
150149
{
151150
return CollectionUtil.GetHashCodeDeep(this);
152151
}
153152

154-
public override bool Equals(object o)
153+
/// <summary>
154+
/// Equals implementation which cannot be overridden.
155+
/// </summary>
156+
public sealed override bool Equals(object o)
155157
{
156158
if (o == null)
157159
{
@@ -161,14 +163,35 @@ public override bool Equals(object o)
161163
{
162164
return true;
163165
}
166+
if (GetType() == o.GetType())
167+
{
168+
return SameTypeEquals((ImmutableList<T>)o);
169+
}
164170
var that = o as ImmutableList<T>;
165171
if (null == that)
166172
{
167173
return false;
168174
}
175+
176+
if (Count != that.Count)
177+
{
178+
return false;
179+
}
180+
169181
return this.SequenceEqual(that);
170182
}
171183

184+
/// <summary>
185+
/// Virtual equals implementation which is only called if the list being
186+
/// compared against is the same type as this. Derived classes may override
187+
/// this if they have a more efficient way of comparing against other
188+
/// instances of themselves (e.g. <see cref="ConstantList{T}"/>)
189+
/// </summary>
190+
protected virtual bool SameTypeEquals(ImmutableList<T> list)
191+
{
192+
return this.SequenceEqual(list);
193+
}
194+
172195
public abstract int Count { get; }
173196

174197
void ICollection<T>.Add(T item)
@@ -204,10 +227,32 @@ IEnumerator IEnumerable.GetEnumerator()
204227

205228
public abstract IEnumerator<T> GetEnumerator();
206229

207-
public abstract bool Contains(T item);
208-
public abstract void CopyTo(T[] array, int arrayIndex);
230+
public virtual bool Contains(T item)
231+
{
232+
return this.AsEnumerable().Contains(item);
233+
}
234+
public virtual void CopyTo(T[] array, int arrayIndex)
235+
{
236+
foreach (var item in this)
237+
{
238+
array[arrayIndex++] = item;
239+
}
240+
}
241+
242+
public virtual int IndexOf(T item)
243+
{
244+
int index = 0;
245+
foreach (var v in this)
246+
{
247+
if (Equals(v, item))
248+
{
249+
return index;
250+
}
251+
index++;
252+
}
209253

210-
public abstract int IndexOf(T item);
254+
return -1;
255+
}
211256
public abstract T this[int index] { get; }
212257

213258
T IList<T>.this[int index]
@@ -216,12 +261,12 @@ T IList<T>.this[int index]
216261
set { throw new InvalidOperationException(); }
217262
}
218263

219-
public abstract ImmutableList<T> ReplaceAt(int index, T value);
220-
221-
/// <summary>
222-
/// Replaces the first item in the list that matches the predicate with the given value; throws IndexOutOfRange if no item is replaced.
223-
/// </summary>
224-
public abstract ImmutableList<T> ReplaceElement(T value, Func<T, bool> predicate);
264+
public virtual ImmutableList<T> ReplaceAt(int index, T value)
265+
{
266+
var array = this.ToArray();
267+
array[index] = value;
268+
return ImmutableList.ValueOf(array);
269+
}
225270

226271
private class Impl : ImmutableList<T>
227272
{
@@ -271,12 +316,6 @@ public override ImmutableList<T> ReplaceAt(int index, T value)
271316
newArray[index] = value;
272317
return new Impl(newArray);
273318
}
274-
275-
public override ImmutableList<T> ReplaceElement(T value, Func<T, bool> predicate)
276-
{
277-
int index = Array.FindIndex(_items, t => predicate(t));
278-
return ReplaceAt(index, value);
279-
}
280319
}
281320

282321
private class SingletonImpl : ImmutableList<T>
@@ -332,14 +371,90 @@ public override ImmutableList<T> ReplaceAt(int index, T value)
332371
}
333372
return new SingletonImpl(value);
334373
}
374+
}
375+
}
335376

336-
public override ImmutableList<T> ReplaceElement(T value, Func<T, bool> predicate)
377+
public class ConstantList<T> : ImmutableList<T>
378+
{
379+
private int _count;
380+
private T _value;
381+
public ConstantList(int count, T value)
382+
{
383+
_count = count;
384+
_value = value;
385+
}
386+
387+
public override IEnumerator<T> GetEnumerator()
388+
{
389+
return Enumerable.Repeat(_value, _count).GetEnumerator();
390+
}
391+
392+
public override int Count
393+
{
394+
get { return _count; }
395+
}
396+
397+
public override T this[int index]
398+
{
399+
get
337400
{
338-
if (!predicate(_item))
339-
{
340-
throw new IndexOutOfRangeException();
341-
}
342-
return ReplaceAt(0, value);
401+
if (index < 0 || index >= _count)
402+
throw new ArgumentOutOfRangeException(nameof(index));
403+
return _value;
404+
}
405+
}
406+
407+
protected override bool SameTypeEquals(ImmutableList<T> list)
408+
{
409+
var that = (ConstantList<T>) list;
410+
return Count == list.Count && Equals(_value, that._value);
411+
}
412+
}
413+
414+
/// <summary>
415+
/// Efficiently stores a list of <see cref="Nullable"/> by using
416+
/// <see cref="IntegerList.Bits"/>.
417+
/// </summary>
418+
public class NullableList<T> : ImmutableList<T?> where T : struct
419+
{
420+
private ImmutableList<int> _hasValueList;
421+
private ImmutableList<T> _values;
422+
423+
public NullableList(IEnumerable<T?> items)
424+
{
425+
var values = new List<T>();
426+
var hasValues = new List<int>();
427+
foreach (var item in items)
428+
{
429+
values.Add(item.GetValueOrDefault());
430+
hasValues.Add(item.HasValue ? 1 : 0);
431+
}
432+
433+
_hasValueList = IntegerList.FromIntegers(hasValues);
434+
if (typeof(T) == typeof(int))
435+
{
436+
_values = (ImmutableList<T>)(object)IntegerList.FromIntegers((List<int>)(object) values);
437+
}
438+
else
439+
{
440+
_values = values.ToImmutable();
441+
}
442+
}
443+
444+
public override int Count
445+
{
446+
get { return _values.Count; }
447+
}
448+
public override IEnumerator<T?> GetEnumerator()
449+
{
450+
return Enumerable.Range(0, Count).Select(i => this[i]).GetEnumerator();
451+
}
452+
453+
public override T? this[int index]
454+
{
455+
get
456+
{
457+
return _hasValueList[index] == 0 ? (T?) null : _values[index];
343458
}
344459
}
345460
}

0 commit comments

Comments
 (0)