Skip to content

Commit be8d88f

Browse files
NivekTpytorchmergebot
authored andcommitted
[DataLoader] Removing DataLoader2 related code (pytorch#88848)
Removing these lines of code as `DataLoader2` has been added to [TorchData](https://github.com/pytorch/data). I'm importing this to confirm it will not impact internal codes. Differential Revision: [D41201578](https://our.internmc.facebook.com/intern/diff/D41201578) Pull Request resolved: pytorch#88848 Approved by: https://github.com/ejguan
1 parent f39cad5 commit be8d88f

File tree

11 files changed

+0
-1016
lines changed

11 files changed

+0
-1016
lines changed

docs/source/data.rst

-3
Original file line numberDiff line numberDiff line change
@@ -441,9 +441,6 @@ Example::
441441
.. autoclass:: torch.utils.data.distributed.DistributedSampler
442442

443443

444-
.. This module is experimental and should be private, adding it here for now
445-
.. py:module:: torch.utils.data.communication
446-
447444
.. These modules are documented as part of torch/data listing them here for
448445
.. now until we have a clearer fix
449446
.. py:module:: torch.utils.data.datapipes

test/test_dataloader.py

-111
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,16 @@
2020
ChainDataset,
2121
ConcatDataset,
2222
DataLoader,
23-
DataLoader2,
2423
Dataset,
2524
IterableDataset,
2625
IterDataPipe,
2726
Subset,
2827
TensorDataset,
29-
communication,
3028
_utils
3129
)
3230
from torch.utils.data._utils import MP_STATUS_CHECK_INTERVAL
3331
from torch.utils.data.dataset import random_split
3432
from torch.utils.data.datapipes.iter import IterableWrapper
35-
from torch.utils.data.datapipes.map import SequenceWrapper
3633
from torch._utils import ExceptionWrapper
3734
from torch.testing._internal.common_utils import (TestCase, run_tests, TEST_NUMPY, IS_WINDOWS,
3835
IS_CI, NO_MULTIPROCESSING_SPAWN, skipIfRocm, slowTest,
@@ -2222,114 +2219,6 @@ def test_excessive_thread_creation_warning(self):
22222219
r"excessive worker creation might get DataLoader running slow or even freeze"):
22232220
dataloader = DataLoader(self.dataset, batch_size=2, num_workers=1000)
22242221

2225-
# Define a global function for testing purposes since local functions cannot be pickled
2226-
def identity(x):
2227-
return x
2228-
2229-
@unittest.skipIf(
2230-
TEST_WITH_TSAN,
2231-
"Fails with TSAN with the following error: starting new threads after multi-threaded "
2232-
"fork is not supported. Dying (set die_after_fork=0 to override)")
2233-
class TestDataLoader2(TestCase):
2234-
@skipIfNoDill
2235-
def test_basics(self):
2236-
# TODO(VitalyFedyunin): This test will start breaking if we remove guaranteed order
2237-
# of traversing workers
2238-
dp = IterableWrapper(list(range(1000))).sharding_filter()
2239-
dl = DataLoader(dp, batch_size=3, collate_fn=identity, num_workers=2)
2240-
dl2 = DataLoader2(dp, batch_size=3, collate_fn=identity, num_workers=2)
2241-
dl2_threading = DataLoader2(dp, batch_size=3, collate_fn=identity, num_workers=2, parallelism_mode='thread')
2242-
self.assertEqual(list(dl), list(dl2))
2243-
self.assertEqual(list(dl), list(dl2_threading))
2244-
2245-
class Sorter(IterDataPipe):
2246-
def __init__(self, datapipe):
2247-
self.datapipe = datapipe
2248-
2249-
def __iter__(self):
2250-
return iter(sorted(self.datapipe))
2251-
2252-
def test_shuffle(self):
2253-
items = list(range(1000))
2254-
dp = IterableWrapper(items).sharding_filter().shuffle()
2255-
2256-
dl = DataLoader2(dp, batch_size=None, num_workers=2, shuffle=False)
2257-
self.assertEqual(items, list(dl))
2258-
2259-
dl = DataLoader2(dp, batch_size=None, num_workers=2, shuffle=True)
2260-
self.assertNotEqual(items, list(dl))
2261-
self.assertEqual(items, sorted(list(dl)))
2262-
2263-
dl = DataLoader2(dp, batch_size=None, num_workers=2, shuffle=True)
2264-
self.assertNotEqual(items, list(dl))
2265-
self.assertEqual(items, sorted(list(dl)))
2266-
2267-
dl = DataLoader2(self.Sorter(dp), batch_size=None, num_workers=2, shuffle=True)
2268-
self.assertEqual(list(dl), items)
2269-
2270-
dl = DataLoader2(self.Sorter(dp), batch_size=None, num_workers=2, shuffle=True)
2271-
self.assertEqual(list(dl), items)
2272-
2273-
2274-
@unittest.skipIf(
2275-
TEST_WITH_TSAN,
2276-
"Fails with TSAN with the following error: starting new threads after multi-threaded "
2277-
"fork is not supported. Dying (set die_after_fork=0 to override)")
2278-
class TestDataLoader2_EventLoop(TestCase):
2279-
@skipIfNoDill
2280-
def test_basic_threading(self):
2281-
def clean_me(process, req_queue, res_queue):
2282-
req_queue.put(communication.messages.TerminateRequest())
2283-
_ = res_queue.get()
2284-
process.join()
2285-
2286-
it = list(range(100))
2287-
numbers_dp = IterableWrapper(it)
2288-
(process, req_queue, res_queue, _thread_local_datapipe) = communication.eventloop.SpawnThreadForDataPipeline(numbers_dp)
2289-
2290-
process.start()
2291-
local_datapipe = communication.iter.QueueWrapper(
2292-
communication.protocol.IterDataPipeQueueProtocolClient(req_queue, res_queue))
2293-
2294-
actual = list(local_datapipe)
2295-
clean_me(process, req_queue, res_queue)
2296-
2297-
self.assertEqual(list(range(100)), actual)
2298-
2299-
@skipIfNoDill
2300-
def test_basic_mapdatapipe_threading(self):
2301-
def clean_me(process, req_queue, res_queue):
2302-
req_queue.put(communication.messages.TerminateRequest())
2303-
_ = res_queue.get()
2304-
process.join()
2305-
2306-
input_len = 100
2307-
it = list(range(input_len))
2308-
numbers_dp = SequenceWrapper(it)
2309-
(process, req_queue, res_queue, _thread_local_datapipe) = communication.eventloop.SpawnThreadForDataPipeline(
2310-
numbers_dp)
2311-
2312-
process.start()
2313-
2314-
# Functional Test: Ensure that you can retrieve every element from the Queue and DataPipe
2315-
local_datapipe = communication.map.QueueWrapperForMap(
2316-
communication.protocol.MapDataPipeQueueProtocolClient(req_queue, res_queue))
2317-
actual = list(local_datapipe)
2318-
self.assertEqual([(x, x) for x in range(100)], actual)
2319-
2320-
# Functional Test: raise Error when input
2321-
local_datapipe = communication.map.QueueWrapperForMap(
2322-
communication.protocol.MapDataPipeQueueProtocolClient(req_queue, res_queue))
2323-
with self.assertRaisesRegex(IndexError, "out of bound"):
2324-
local_datapipe[1000]
2325-
2326-
# __len__ Test: Ensure that the correct length is returned
2327-
local_datapipe = communication.map.QueueWrapperForMap(
2328-
communication.protocol.MapDataPipeQueueProtocolClient(req_queue, res_queue))
2329-
self.assertEqual(input_len, len(local_datapipe))
2330-
2331-
clean_me(process, req_queue, res_queue)
2332-
23332222

23342223
class IntegrationTestDataLoaderDataPipe(TestCase):
23352224
r"""

torch/utils/data/__init__.py

-5
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,13 @@
3939
runtime_validation,
4040
runtime_validation_disabled,
4141
)
42-
from torch.utils.data.dataloader_experimental import DataLoader2
43-
from torch.utils.data import communication
4442

4543
__all__ = ['BatchSampler',
4644
'ChainDataset',
4745
'ConcatDataset',
4846
'DFIterDataPipe',
4947
'DataChunk',
5048
'DataLoader',
51-
'DataLoader2',
5249
'Dataset',
5350
'DistributedSampler',
5451
'IterDataPipe',
@@ -63,8 +60,6 @@
6360
'WeightedRandomSampler',
6461
'_DatasetKind',
6562
'argument_validation',
66-
'collate',
67-
'communication',
6863
'default_collate',
6964
'default_convert',
7065
'functional_datapipe',

torch/utils/data/communication/__init__.py

-6
This file was deleted.

torch/utils/data/communication/eventloop.py

-70
This file was deleted.

0 commit comments

Comments
 (0)