Skip to content
This repository has been archived by the owner on Jun 7, 2022. It is now read-only.

Add mult-filter to_dataframe method #64

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 62 additions & 17 deletions purpleair/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,27 +110,12 @@ def filter_column(self,
column: Optional[str],
value_filter: Union[str, int, float, None]) -> pd.DataFrame:
"""
Filter sensors by column and value_filter. If only column is passed, we
return rows that are not None. If the value_filter is passed, we only
return rows where the column matches that value.
Returns the output of filter_column_to_array as a pandas dataframe.
"""
# Check if there is no column passed
if column is None:
raise ValueError('No column name provided to filter on!')
out_l: List[dict] = []
for sensor in self.all_sensors:
sensor_data = sensor.as_flat_dict(channel)
if column not in sensor_data:
raise ValueError(
f'Requested column {column} does not exist in sensor data!')
result = sensor_data.get(column)
if value_filter and result != value_filter:
continue
if value_filter and result == value_filter:
out_l.append(sensor_data)
elif result is not None:
# If we do not want to filter the values, we filter out `None`s
out_l.append(sensor_data)
out_l = filter_column_to_array(self.all_sensors, channel, column, value_filter)

if len(out_l) == 0:
# pylint: disable=line-too-long
Expand Down Expand Up @@ -181,3 +166,63 @@ def to_dataframe(self,

sensor_data.index = sensor_data.pop('id')
return sensor_data

def to_dataframe_multi_filter(self,
channel: str,
sensor_filters: Optional[str] = None,
column: Optional[str] = None,
value_filter: Union[str, int, float, None] = None) -> pd.DataFrame:
"""
Returns a Pandas dataframe with filtered sensors, based on multiple filters
provided by the user. If no filters are provided, we return all sensors in
self.all_sensors.
"""

if not sensor_filters or len(sensor_filters) == 0 or 'all' in sensor_filters:
sensor_data = pd.DataFrame([s.as_flat_dict(channel) for s in self.all_sensors])
else:
relevant_sensors = self.all_sensors

if 'outside' in sensor_filters:
relevant_sensors = [s for s in relevant_sensors if s.location_type == 'outside']
if 'useful' in sensor_filters:
relevant_sensors = [s for s in relevant_sensors if s.is_useful()]
if 'family' in sensor_filters:
relevant_sensors = [s for s in relevant_sensors if s.parent and s.child]
if 'no_child' in sensor_filters:
relevant_sensors = [s for s in relevant_sensors if not s.child]

dict_transformed_sensors = [s.as_flat_dict(channel) for s in relevant_sensors]
if 'column' in sensor_filters:
dict_transformed_sensors = filter_column_to_array(
relevant_sensors, channel, column, value_filter)

sensor_data = pd.DataFrame(dict_transformed_sensors)

sensor_data.index = sensor_data.pop('id')
return sensor_data

def filter_column_to_array(
sensors, channel: str, column: Optional[str],
value_filter: Union[str, int, float, None]
) -> List[dict]:
"""
Filter sensors by column and value_filter. If only column is passed, we
return rows that are not None. If the value_filter is passed, we only
return rows where the column matches that value.
"""
out_l: List[dict] = []
for sensor in sensors:
sensor_data = sensor.as_flat_dict(channel)
if column not in sensor_data:
raise ValueError(
f'Requested column {column} does not exist in sensor data!')
result = sensor_data.get(column)
if value_filter and result != value_filter:
continue
if value_filter and result == value_filter:
out_l.append(sensor_data)
elif result is not None:
# If we do not want to filter the values, we filter out `None`s
out_l.append(sensor_data)
return out_l
40 changes: 40 additions & 0 deletions tests/test_purpleair.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,46 @@ def test_to_dataframe_cols(self):
df_b = p.to_dataframe(sensor_filter='all', channel='child')
self.assertListEqual(list(df_a.columns), list(df_b.columns))

def test_to_dataframe_multi_filter_outside_family(self):
"""
Test that family sensor plus outside sensor filter works.
"""
p = network.SensorList()
p.to_dataframe_multi_filter(sensor_filters=['outside', 'family'], channel='parent')
p.to_dataframe_multi_filter(sensor_filters=['outside', 'family'], channel='child')

def test_to_dataframe_multi_filter_parity(self):
"""
Test that to_dataframe_multi_filter returns same results as to_dataframe
when just one filter is passed in.
"""
p = network.SensorList()

# all sensors
df_single_filter = p.to_dataframe(sensor_filter='all', channel='parent')
df_multi_filter = p.to_dataframe_multi_filter(sensor_filters=['all'], channel='parent')
self.assertListEqual(list(df_single_filter.columns), list(df_multi_filter.columns))
self.assertEqual(len(df_single_filter), len(df_multi_filter))

# outside sensors only
df_single_filter = p.to_dataframe(sensor_filter='outside', channel='parent')
df_multi_filter = p.to_dataframe_multi_filter(sensor_filters=['outside'], channel='parent')
self.assertListEqual(list(df_single_filter.columns), list(df_multi_filter.columns))
self.assertEqual(len(df_single_filter), len(df_multi_filter))

# no_child sensors only
df_single_filter = p.to_dataframe(sensor_filter='no_child', channel='parent')
df_multi_filter = p.to_dataframe_multi_filter(sensor_filters=['no_child'], channel='parent')
self.assertListEqual(list(df_single_filter.columns), list(df_multi_filter.columns))
self.assertEqual(len(df_single_filter), len(df_multi_filter))

# useful sensors only
df_single_filter = p.to_dataframe(sensor_filter='useful', channel='parent')
df_multi_filter = p.to_dataframe_multi_filter(sensor_filters=['useful'], channel='parent')
self.assertListEqual(list(df_single_filter.columns), list(df_multi_filter.columns))
self.assertEqual(len(df_single_filter), len(df_multi_filter))



class TestPurpleAirColumnFilters(unittest.TestCase):
"""
Expand Down