@@ -5299,6 +5299,7 @@ def to_sql(
52995299 name : str ,
53005300 con : Union [str , "sqlalchemy.engine.Connection" , "sqlalchemy.engine.Engine" , "sqlite3.Connection" ],
53015301 batch_size : Optional [int ] = None ,
5302+ num_proc : Optional [int ] = None ,
53025303 ** sql_writer_kwargs ,
53035304 ) -> int :
53045305 """Exports the dataset to a SQL database.
@@ -5311,6 +5312,11 @@ def to_sql(
53115312 batch_size (`int`, *optional*):
53125313 Size of the batch to load in memory and write at once.
53135314 Defaults to `datasets.config.DEFAULT_MAX_BATCH_SIZE`.
5315+ num_proc (`int`, *optional*):
5316+ Number of processes for multiprocessing. By default, it doesn't
5317+ use multiprocessing. `batch_size` in this case defaults to
5318+ `datasets.config.DEFAULT_MAX_BATCH_SIZE` but feel free to make it 5x or 10x of the default
5319+ value if you have sufficient compute power.
53145320 **sql_writer_kwargs (additional keyword arguments):
53155321 Parameters to pass to pandas's [`pandas.DataFrame.to_sql`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html).
53165322
@@ -5341,7 +5347,7 @@ def to_sql(
53415347 # Dynamic import to avoid circular dependency
53425348 from .io .sql import SqlDatasetWriter
53435349
5344- return SqlDatasetWriter (self , name , con , batch_size = batch_size , ** sql_writer_kwargs ).write ()
5350+ return SqlDatasetWriter (self , name , con , batch_size = batch_size , num_proc = num_proc , ** sql_writer_kwargs ).write ()
53455351
53465352 def _estimate_nbytes (self ) -> int :
53475353 dataset_nbytes = self .data .nbytes
0 commit comments