Skip to content

Commit

Permalink
Merge pull request #9541 from neo-technology/neo4j-values
Browse files Browse the repository at this point in the history
Reduce usage of Neo4j values
  • Loading branch information
jjaderberg authored Aug 27, 2024
2 parents e5d5ea1 + 7e6812f commit 19e3bca
Show file tree
Hide file tree
Showing 43 changed files with 701 additions and 290 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.eclipse.collections.api.block.function.primitive.LongToObjectFunction;
import org.neo4j.gds.api.Graph;
import org.neo4j.gds.api.nodeproperties.ValueType;
import org.neo4j.gds.api.properties.nodes.FilteredNodePropertyValuesMarker;
import org.neo4j.gds.api.properties.nodes.LongArrayNodePropertyValues;
import org.neo4j.gds.api.properties.nodes.LongNodePropertyValues;
import org.neo4j.gds.api.properties.nodes.NodeProperty;
Expand All @@ -30,8 +31,6 @@
import org.neo4j.gds.core.concurrency.Concurrency;
import org.neo4j.gds.core.concurrency.DefaultPool;
import org.neo4j.gds.result.CommunityStatistics;
import org.neo4j.values.storable.LongValue;
import org.neo4j.values.storable.Value;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -65,13 +64,13 @@ public static NodePropertyValues nodePropertyValues(
Supplier<NodeProperty> seedPropertySupplier
) {

if (consecutiveIds) {
return new ConsecutiveLongNodePropertyValues(nodeProperties);
}
if (isIncremental && resultProperty.equals(seedProperty)) {
nodeProperties = LongIfChangedNodePropertyValues.of(seedPropertySupplier.get(), nodeProperties);
}

if (consecutiveIds) {
return new ConsecutiveLongNodePropertyValues(nodeProperties);
}

return nodeProperties;
}
Expand Down Expand Up @@ -160,7 +159,7 @@ static List<List<Double>> arrayMatrixToListMatrix(boolean shouldCompute, double[
return null;
}

private static class CommunitySizeFilter implements LongNodePropertyValues {
private static class CommunitySizeFilter implements LongNodePropertyValues, FilteredNodePropertyValuesMarker {

private final LongNodePropertyValues properties;

Expand All @@ -183,31 +182,23 @@ public long nodeCount() {
return properties.nodeCount();
}

@Override
public long longValue(long nodeId) {
return properties.longValue(nodeId);
}

/**
* Returning null indicates that the value is not written to Neo4j.
* Returning Long.MIN_VALUE indicates that the value should not be written to Neo4j.
* <p>
* The filter is applied in the latest stage before writing to Neo4j.
* Since the wrapped node properties may have additional logic in value(),
* Since the wrapped node properties may have additional logic in longValue(),
* we need to check if they already filtered the value. Only in the case
* where the wrapped properties pass on the value, we can apply a filter.
*/
@Override
public Value value(long nodeId) {
var value = properties.value(nodeId);

if (value == null) {
return null;
public long longValue(long nodeId) {
var longValue = properties.longValue(nodeId);
// did the wrapped properties filter out the value?
if (longValue == Long.MIN_VALUE) {
return Long.MIN_VALUE;
}

// This cast is safe since we handle LongNodeProperties.
var communityId = ((LongValue) value).longValue();

return isCommunityMinSizeMet(communityId) ? value : null;
// apply our own filter
return isCommunityMinSizeMet(longValue) ? longValue : Long.MIN_VALUE;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@
*/
package org.neo4j.gds.algorithms.community;

import org.neo4j.gds.api.properties.nodes.FilteredNodePropertyValuesMarker;
import org.neo4j.gds.api.properties.nodes.LongNodePropertyValues;
import org.neo4j.gds.collections.ha.HugeLongArray;
import org.neo4j.gds.core.utils.paged.HugeLongLongMap;
import org.neo4j.gds.mem.BitUtil;
import org.neo4j.values.storable.Value;
import org.neo4j.values.storable.Values;

public class ConsecutiveLongNodePropertyValues implements LongNodePropertyValues {
public class ConsecutiveLongNodePropertyValues implements LongNodePropertyValues, FilteredNodePropertyValuesMarker {

private static final long MAPPING_SIZE_QUOTIENT = 10L;
private static final long NO_VALUE = -1L;
Expand Down Expand Up @@ -59,25 +58,28 @@ public ConsecutiveLongNodePropertyValues(LongNodePropertyValues inputProperties)
}
}

/**
* Returning Long.MIN_VALUE indicates that the value should not be written to Neo4j.
* <p>
* The filter is applied in the latest stage before writing to Neo4j.
* Since the wrapped node properties may have additional logic in longValue(),
* we need to check if they already filtered the value. Only in the case
* where the wrapped properties pass on the value, we can apply a filter.
*/
@Override
public long longValue(long nodeId) {
return communities.get(nodeId);
long l = communities.get(nodeId);
if (l == NO_VALUE) {
return Long.MIN_VALUE;
}
return l;
}

@Override
public boolean hasValue(long nodeId) {
return communities.get(nodeId) != NO_VALUE;
}

@Override
public Value value(long nodeId) {
if (hasValue(nodeId)) {
return Values.longValue(communities.get(nodeId));
}
return null;

}

@Override
public long nodeCount() {
return communities.size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,14 @@

import org.neo4j.gds.api.PropertyState;
import org.neo4j.gds.api.nodeproperties.ValueType;
import org.neo4j.gds.api.properties.nodes.FilteredNodePropertyValuesMarker;
import org.neo4j.gds.api.properties.nodes.LongNodePropertyValues;
import org.neo4j.gds.api.properties.nodes.NodeProperty;
import org.neo4j.gds.api.properties.nodes.NodePropertyValues;
import org.neo4j.values.storable.Value;
import org.neo4j.values.storable.Values;

import static org.neo4j.gds.utils.StringFormatting.formatWithLocale;

public final class LongIfChangedNodePropertyValues implements LongNodePropertyValues {
public final class LongIfChangedNodePropertyValues implements LongNodePropertyValues, FilteredNodePropertyValuesMarker {

private final NodePropertyValues seedProperties;
private final NodePropertyValues newProperties;
Expand Down Expand Up @@ -62,17 +61,20 @@ private LongIfChangedNodePropertyValues(
this.newProperties = newProperties;
}

/**
* Returning Long.MIN_VALUE indicates that the value should not be written to Neo4j.
* <p>
* The filter is applied in the latest stage before writing to Neo4j.
* Since the wrapped node properties may have additional logic in longValue(),
* we need to check if they already filtered the value. Only in the case
* where the wrapped properties pass on the value, we can apply a filter.
*/
@Override
public long longValue(long nodeId) {
return newProperties.longValue(nodeId);
}

@Override
public Value value(long nodeId) {
long seedValue = seedProperties.longValue(nodeId);
long writeValue = newProperties.longValue(nodeId);
var seedValue = seedProperties.longValue(nodeId);
var writeValue = newProperties.longValue(nodeId);

return seedValue == Long.MIN_VALUE || (seedValue != writeValue) ? Values.longValue(writeValue) : null;
return (seedValue != writeValue) ? writeValue : Long.MIN_VALUE;
}

@Override
Expand All @@ -84,7 +86,7 @@ public long nodeCount() {
public boolean hasValue(long nodeId) {
long seedValue = seedProperties.longValue(nodeId);
long writeValue = newProperties.longValue(nodeId);
return seedValue == Long.MIN_VALUE || (seedValue != writeValue) ? true : false;
return seedValue == Long.MIN_VALUE || (seedValue != writeValue);

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,11 @@
*/
package org.neo4j.gds.embeddings.hashgnn;

import org.neo4j.gds.api.nodeproperties.ValueType;
import org.neo4j.gds.api.properties.nodes.BinaryArrayNodePropertyValues;
import org.neo4j.gds.api.properties.nodes.DoubleArrayNodePropertyValues;
import org.neo4j.gds.api.properties.nodes.NodePropertyValues;
import org.neo4j.gds.core.utils.paged.HugeAtomicBitSet;
import org.neo4j.gds.collections.ha.HugeObjectArray;
import org.neo4j.values.storable.Value;
import org.neo4j.values.storable.Values;

import java.util.Optional;
import org.neo4j.gds.core.utils.paged.HugeAtomicBitSet;

public final class EmbeddingsToNodePropertyValues {
private EmbeddingsToNodePropertyValues() {}
Expand All @@ -49,84 +45,4 @@ public long nodeCount() {
static NodePropertyValues fromBinary(HugeObjectArray<HugeAtomicBitSet> binaryEmbeddings, int embeddingDimension) {
return new BinaryArrayNodePropertyValues(binaryEmbeddings, embeddingDimension);
}

private static class BinaryArrayNodePropertyValues implements NodePropertyValues {

private final HugeObjectArray<HugeAtomicBitSet> binaryEmbeddings;
private final int embeddingDimension;

BinaryArrayNodePropertyValues(
HugeObjectArray<HugeAtomicBitSet> binaryEmbeddings,
int embeddingDimension
) {
this.binaryEmbeddings = binaryEmbeddings;
this.embeddingDimension = embeddingDimension;
}

@Override
public double[] doubleArrayValue(long nodeId) {
return bitSetToDoubleArray(binaryEmbeddings.get(nodeId), embeddingDimension);
}

@Override
public float[] floatArrayValue(long nodeId) {
return bitSetToFloatArray(binaryEmbeddings.get(nodeId), embeddingDimension);
}

@Override
public long[] longArrayValue(long nodeId) {
return bitSetToLongArray(binaryEmbeddings.get(nodeId), embeddingDimension);
}

@Override
public Object getObject(long nodeId) {
return bitSetToDoubleArray(binaryEmbeddings.get(nodeId), embeddingDimension);
}

@Override
public Value value(long nodeId) {
// as Boolean array is not an official property type in GDS we transform to double[].
// We use the same data type as in the dense case.
return Values.doubleArray(bitSetToDoubleArray(binaryEmbeddings.get(nodeId), embeddingDimension));
}

@Override
public Optional<Integer> dimension() {
return Optional.of(embeddingDimension);
}

@Override
public ValueType valueType() {
return ValueType.DOUBLE_ARRAY;
}

@Override
public long nodeCount() {
return binaryEmbeddings.size();
}

private static double[] bitSetToDoubleArray(HugeAtomicBitSet bitSet, int dimension) {
var array = new double[dimension];
bitSet.forEachSetBit(bit -> {
array[(int) bit] = 1.0;
});
return array;
}

private static float[] bitSetToFloatArray(HugeAtomicBitSet bitSet, int dimension) {
var array = new float[dimension];
bitSet.forEachSetBit(bit -> {
array[(int) bit] = 1.0f;
});
return array;
}

private static long[] bitSetToLongArray(HugeAtomicBitSet bitSet, int dimension) {
var array = new long[dimension];
bitSet.forEachSetBit(bit -> {
array[(int) bit] = 1;
});
return array;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.neo4j.gds.api.IdMap;
import org.neo4j.gds.api.nodeproperties.ValueType;
import org.neo4j.gds.api.properties.nodes.NodePropertyValues;
import org.neo4j.values.storable.Value;

import java.util.Optional;

Expand Down Expand Up @@ -85,13 +84,6 @@ public Optional<Integer> dimension() {
return properties.dimension();
}

@Override
public Value value(long nodeId) {
var value = properties.value(nodeId);
check(nodeId, value);
return value;
}

@Override
public long nodeCount() {
return properties.nodeCount();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ void shouldReturnOnlyChangedProperties() {
);

assertThat(result).isInstanceOf(LongIfChangedNodePropertyValues.class);
// properties that have not changed signalled by Long.MIN_VALUE
for (long i = 0; i < result.nodeCount(); i++) {
assertThat(result.longValue(i)).isEqualTo(inputProperties.longValue(i));
assertThat(result.value(i)).isNull();
assertThat(result.longValue(i)).isEqualTo(Long.MIN_VALUE);
}
}

Expand All @@ -123,11 +123,10 @@ void shouldRestrictCommunitySize() {
for (long i = 0L; i < result.nodeCount(); i++) {

if (i < 5) {
assertThat(result.longValue(i)).isEqualTo(inputProperties.longValue(i));
assertThat(result.value(i)).isNull();
// properties that have not changed signalled by Long.MIN_VALUE
assertThat(result.longValue(i)).isEqualTo(Long.MIN_VALUE);
} else {
assertThat(result.longValue(i)).isEqualTo(inputProperties.longValue(i));
assertThat(result.value(i).asObject()).isEqualTo(5L);
}
}
}
Expand All @@ -153,7 +152,6 @@ void shouldWorkWithMinComponentAndConsecutive() {
assertThat(result.hasValue(i)).isFalse();
} else {
assertThat(result.hasValue(i)).isTrue();
assertThat(result.value(i).asObject()).isEqualTo(returnedValues[(int) i]);
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import org.neo4j.gds.core.utils.progress.TaskRegistryFactory;
import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker;
import org.neo4j.gds.core.utils.progress.tasks.TaskProgressTracker;
import org.neo4j.gds.core.write.ImmutableNodeProperty;
import org.neo4j.gds.core.write.NodeProperty;
import org.neo4j.gds.core.write.NodePropertyExporter;
import org.neo4j.gds.core.write.NodePropertyExporterBuilder;
Expand Down Expand Up @@ -72,7 +71,7 @@ static NodePropertiesWritten writeNodeProperty(
TerminationFlag terminationFlag,
Log log
) {
var nodeProperties = List.of(ImmutableNodeProperty.of(writeProperty, nodePropertyValues));
var nodeProperties = List.of(new NodeProperty(writeProperty, nodePropertyValues));

var propertiesWritten = new MutableLong();

Expand Down Expand Up @@ -143,7 +142,7 @@ private static void validatePropertiesCanBeWritten(
var unexpectedProperties = nodeProperties
.stream()
.filter(nodeProperty -> {
var propertySchema = propertySchemas.get(nodeProperty.propertyKey());
var propertySchema = propertySchemas.get(nodeProperty.key());
if (propertySchema == null) {
// We are executing an algorithm write mode and the property we are writing is
// not in the GraphStore, therefore we do not perform any more checks
Expand All @@ -155,8 +154,8 @@ private static void validatePropertiesCanBeWritten(
.map(
nodeProperty -> formatWithLocale(
"NodeProperty{propertyKey=%s, propertyState=%s}",
nodeProperty.propertyKey(),
propertySchemas.get(nodeProperty.propertyKey()).state()
nodeProperty.key(),
propertySchemas.get(nodeProperty.key()).state()
)
)
.collect(Collectors.toList());
Expand Down
Loading

0 comments on commit 19e3bca

Please sign in to comment.