Spark SQL#
This page gives an overview of all public Spark SQL API.
- Core Classes
- pyspark.sql.SparkSession
- pyspark.sql.Catalog
- pyspark.sql.DataFrame
- pyspark.sql.Column
- pyspark.sql.Observation
- pyspark.sql.Row
- pyspark.sql.GroupedData
- pyspark.sql.PandasCogroupedOps
- pyspark.sql.DataFrameNaFunctions
- pyspark.sql.DataFrameStatFunctions
- pyspark.sql.Window
- pyspark.sql.DataFrameReader
- pyspark.sql.DataFrameWriter
- pyspark.sql.DataFrameWriterV2
- pyspark.sql.UDFRegistration
- pyspark.sql.UDTFRegistration
- pyspark.sql.udf.UserDefinedFunction
- pyspark.sql.udtf.UserDefinedTableFunction
- pyspark.sql.datasource.DataSource
- pyspark.sql.datasource.DataSourceReader
- pyspark.sql.datasource.DataSourceStreamReader
- pyspark.sql.datasource.DataSourceWriter
- pyspark.sql.datasource.DataSourceRegistration
- pyspark.sql.datasource.InputPartition
- pyspark.sql.datasource.WriterCommitMessage
- pyspark.sql.VariantVal
- Spark Session
- pyspark.sql.SparkSession.active
- pyspark.sql.SparkSession.builder.appName
- pyspark.sql.SparkSession.builder.config
- pyspark.sql.SparkSession.builder.enableHiveSupport
- pyspark.sql.SparkSession.builder.getOrCreate
- pyspark.sql.SparkSession.builder.master
- pyspark.sql.SparkSession.builder.remote
- pyspark.sql.SparkSession.catalog
- pyspark.sql.SparkSession.conf
- pyspark.sql.SparkSession.createDataFrame
- pyspark.sql.SparkSession.dataSource
- pyspark.sql.SparkSession.getActiveSession
- pyspark.sql.SparkSession.newSession
- pyspark.sql.SparkSession.profile
- pyspark.sql.SparkSession.range
- pyspark.sql.SparkSession.read
- pyspark.sql.SparkSession.readStream
- pyspark.sql.SparkSession.sparkContext
- pyspark.sql.SparkSession.sql
- pyspark.sql.SparkSession.stop
- pyspark.sql.SparkSession.streams
- pyspark.sql.SparkSession.table
- pyspark.sql.SparkSession.tvf
- pyspark.sql.SparkSession.udf
- pyspark.sql.SparkSession.udtf
- pyspark.sql.SparkSession.version
- pyspark.sql.is_remote
- Spark Connect Only
- Configuration
- Input/Output
- pyspark.sql.DataFrameReader.csv
- pyspark.sql.DataFrameReader.format
- pyspark.sql.DataFrameReader.jdbc
- pyspark.sql.DataFrameReader.json
- pyspark.sql.DataFrameReader.load
- pyspark.sql.DataFrameReader.option
- pyspark.sql.DataFrameReader.options
- pyspark.sql.DataFrameReader.orc
- pyspark.sql.DataFrameReader.parquet
- pyspark.sql.DataFrameReader.schema
- pyspark.sql.DataFrameReader.table
- pyspark.sql.DataFrameReader.text
- pyspark.sql.DataFrameWriter.bucketBy
- pyspark.sql.DataFrameWriter.csv
- pyspark.sql.DataFrameWriter.format
- pyspark.sql.DataFrameWriter.insertInto
- pyspark.sql.DataFrameWriter.jdbc
- pyspark.sql.DataFrameWriter.json
- pyspark.sql.DataFrameWriter.mode
- pyspark.sql.DataFrameWriter.option
- pyspark.sql.DataFrameWriter.options
- pyspark.sql.DataFrameWriter.orc
- pyspark.sql.DataFrameWriter.parquet
- pyspark.sql.DataFrameWriter.partitionBy
- pyspark.sql.DataFrameWriter.save
- pyspark.sql.DataFrameWriter.saveAsTable
- pyspark.sql.DataFrameWriter.sortBy
- pyspark.sql.DataFrameWriter.text
- pyspark.sql.DataFrameWriterV2.using
- pyspark.sql.DataFrameWriterV2.option
- pyspark.sql.DataFrameWriterV2.options
- pyspark.sql.DataFrameWriterV2.tableProperty
- pyspark.sql.DataFrameWriterV2.partitionedBy
- pyspark.sql.DataFrameWriterV2.create
- pyspark.sql.DataFrameWriterV2.replace
- pyspark.sql.DataFrameWriterV2.createOrReplace
- pyspark.sql.DataFrameWriterV2.append
- pyspark.sql.DataFrameWriterV2.overwrite
- pyspark.sql.DataFrameWriterV2.overwritePartitions
- pyspark.sql.MergeIntoWriter.whenMatched
- pyspark.sql.MergeIntoWriter.whenNotMatched
- pyspark.sql.MergeIntoWriter.whenNotMatchedBySource
- pyspark.sql.MergeIntoWriter.withSchemaEvolution
- pyspark.sql.MergeIntoWriter.merge
- DataFrame
- pyspark.sql.DataFrame.__getattr__
- pyspark.sql.DataFrame.__getitem__
- pyspark.sql.DataFrame.agg
- pyspark.sql.DataFrame.alias
- pyspark.sql.DataFrame.approxQuantile
- pyspark.sql.DataFrame.cache
- pyspark.sql.DataFrame.checkpoint
- pyspark.sql.DataFrame.coalesce
- pyspark.sql.DataFrame.colRegex
- pyspark.sql.DataFrame.collect
- pyspark.sql.DataFrame.columns
- pyspark.sql.DataFrame.corr
- pyspark.sql.DataFrame.count
- pyspark.sql.DataFrame.cov
- pyspark.sql.DataFrame.createGlobalTempView
- pyspark.sql.DataFrame.createOrReplaceGlobalTempView
- pyspark.sql.DataFrame.createOrReplaceTempView
- pyspark.sql.DataFrame.createTempView
- pyspark.sql.DataFrame.crossJoin
- pyspark.sql.DataFrame.crosstab
- pyspark.sql.DataFrame.cube
- pyspark.sql.DataFrame.describe
- pyspark.sql.DataFrame.distinct
- pyspark.sql.DataFrame.drop
- pyspark.sql.DataFrame.dropDuplicates
- pyspark.sql.DataFrame.dropDuplicatesWithinWatermark
- pyspark.sql.DataFrame.drop_duplicates
- pyspark.sql.DataFrame.dropna
- pyspark.sql.DataFrame.dtypes
- pyspark.sql.DataFrame.exceptAll
- pyspark.sql.DataFrame.executionInfo
- pyspark.sql.DataFrame.explain
- pyspark.sql.DataFrame.fillna
- pyspark.sql.DataFrame.filter
- pyspark.sql.DataFrame.first
- pyspark.sql.DataFrame.foreach
- pyspark.sql.DataFrame.foreachPartition
- pyspark.sql.DataFrame.freqItems
- pyspark.sql.DataFrame.groupBy
- pyspark.sql.DataFrame.groupingSets
- pyspark.sql.DataFrame.head
- pyspark.sql.DataFrame.hint
- pyspark.sql.DataFrame.inputFiles
- pyspark.sql.DataFrame.intersect
- pyspark.sql.DataFrame.intersectAll
- pyspark.sql.DataFrame.isEmpty
- pyspark.sql.DataFrame.isLocal
- pyspark.sql.DataFrame.isStreaming
- pyspark.sql.DataFrame.join
- pyspark.sql.DataFrame.limit
- pyspark.sql.DataFrame.localCheckpoint
- pyspark.sql.DataFrame.mapInPandas
- pyspark.sql.DataFrame.mapInArrow
- pyspark.sql.DataFrame.melt
- pyspark.sql.DataFrame.na
- pyspark.sql.DataFrame.observe
- pyspark.sql.DataFrame.offset
- pyspark.sql.DataFrame.orderBy
- pyspark.sql.DataFrame.persist
- pyspark.sql.DataFrame.printSchema
- pyspark.sql.DataFrame.randomSplit
- pyspark.sql.DataFrame.rdd
- pyspark.sql.DataFrame.registerTempTable
- pyspark.sql.DataFrame.repartition
- pyspark.sql.DataFrame.repartitionByRange
- pyspark.sql.DataFrame.replace
- pyspark.sql.DataFrame.rollup
- pyspark.sql.DataFrame.sameSemantics
- pyspark.sql.DataFrame.sample
- pyspark.sql.DataFrame.sampleBy
- pyspark.sql.DataFrame.schema
- pyspark.sql.DataFrame.select
- pyspark.sql.DataFrame.selectExpr
- pyspark.sql.DataFrame.semanticHash
- pyspark.sql.DataFrame.show
- pyspark.sql.DataFrame.sort
- pyspark.sql.DataFrame.sortWithinPartitions
- pyspark.sql.DataFrame.sparkSession
- pyspark.sql.DataFrame.stat
- pyspark.sql.DataFrame.storageLevel
- pyspark.sql.DataFrame.subtract
- pyspark.sql.DataFrame.summary
- pyspark.sql.DataFrame.tail
- pyspark.sql.DataFrame.take
- pyspark.sql.DataFrame.to
- pyspark.sql.DataFrame.toArrow
- pyspark.sql.DataFrame.toDF
- pyspark.sql.DataFrame.toJSON
- pyspark.sql.DataFrame.toLocalIterator
- pyspark.sql.DataFrame.toPandas
- pyspark.sql.DataFrame.transform
- pyspark.sql.DataFrame.union
- pyspark.sql.DataFrame.unionAll
- pyspark.sql.DataFrame.unionByName
- pyspark.sql.DataFrame.unpersist
- pyspark.sql.DataFrame.unpivot
- pyspark.sql.DataFrame.where
- pyspark.sql.DataFrame.withColumn
- pyspark.sql.DataFrame.withColumns
- pyspark.sql.DataFrame.withColumnRenamed
- pyspark.sql.DataFrame.withColumnsRenamed
- pyspark.sql.DataFrame.withMetadata
- pyspark.sql.DataFrame.withWatermark
- pyspark.sql.DataFrame.write
- pyspark.sql.DataFrame.writeStream
- pyspark.sql.DataFrame.writeTo
- pyspark.sql.DataFrame.mergeInto
- pyspark.sql.DataFrame.pandas_api
- pyspark.sql.DataFrameNaFunctions.drop
- pyspark.sql.DataFrameNaFunctions.fill
- pyspark.sql.DataFrameNaFunctions.replace
- pyspark.sql.DataFrameStatFunctions.approxQuantile
- pyspark.sql.DataFrameStatFunctions.corr
- pyspark.sql.DataFrameStatFunctions.cov
- pyspark.sql.DataFrameStatFunctions.crosstab
- pyspark.sql.DataFrameStatFunctions.freqItems
- pyspark.sql.DataFrameStatFunctions.sampleBy
- Column
- pyspark.sql.Column.__getattr__
- pyspark.sql.Column.__getitem__
- pyspark.sql.Column.alias
- pyspark.sql.Column.asc
- pyspark.sql.Column.asc_nulls_first
- pyspark.sql.Column.asc_nulls_last
- pyspark.sql.Column.astype
- pyspark.sql.Column.between
- pyspark.sql.Column.bitwiseAND
- pyspark.sql.Column.bitwiseOR
- pyspark.sql.Column.bitwiseXOR
- pyspark.sql.Column.cast
- pyspark.sql.Column.contains
- pyspark.sql.Column.desc
- pyspark.sql.Column.desc_nulls_first
- pyspark.sql.Column.desc_nulls_last
- pyspark.sql.Column.dropFields
- pyspark.sql.Column.endswith
- pyspark.sql.Column.eqNullSafe
- pyspark.sql.Column.getField
- pyspark.sql.Column.getItem
- pyspark.sql.Column.ilike
- pyspark.sql.Column.isNaN
- pyspark.sql.Column.isNotNull
- pyspark.sql.Column.isNull
- pyspark.sql.Column.isin
- pyspark.sql.Column.like
- pyspark.sql.Column.name
- pyspark.sql.Column.otherwise
- pyspark.sql.Column.over
- pyspark.sql.Column.rlike
- pyspark.sql.Column.startswith
- pyspark.sql.Column.substr
- pyspark.sql.Column.try_cast
- pyspark.sql.Column.when
- pyspark.sql.Column.withField
- Data Types
- ArrayType
- BinaryType
- BooleanType
- ByteType
- DataType
- DateType
- DecimalType
- DoubleType
- FloatType
- IntegerType
- LongType
- MapType
- NullType
- ShortType
- StringType
- CharType
- VarcharType
- StructField
- StructType
- VariantType
- TimestampType
- TimestampNTZType
- DayTimeIntervalType
- YearMonthIntervalType
- CalendarIntervalType
- Row
- Functions
- Normal Functions
- Conditional Functions
- Predicate Functions
- Sort Functions
- Mathematical Functions
- String Functions
- Bitwise Functions
- Date and Timestamp Functions
- Hash Functions
- Collection Functions
- Array Functions
- Struct Functions
- Map Functions
- Aggregate Functions
- Window Functions
- Generator Functions
- Partition Transformation Functions
- CSV Functions
- JSON Functions
- VARIANT Functions
- XML Functions
- URL Functions
- Misc Functions
- UDF, UDTF and UDT
- Window
- pyspark.sql.Window.currentRow
- pyspark.sql.Window.orderBy
- pyspark.sql.Window.partitionBy
- pyspark.sql.Window.rangeBetween
- pyspark.sql.Window.rowsBetween
- pyspark.sql.Window.unboundedFollowing
- pyspark.sql.Window.unboundedPreceding
- pyspark.sql.WindowSpec.orderBy
- pyspark.sql.WindowSpec.partitionBy
- pyspark.sql.WindowSpec.rangeBetween
- pyspark.sql.WindowSpec.rowsBetween
- Grouping
- pyspark.sql.GroupedData.agg
- pyspark.sql.GroupedData.apply
- pyspark.sql.GroupedData.applyInArrow
- pyspark.sql.GroupedData.applyInPandas
- pyspark.sql.GroupedData.applyInPandasWithState
- pyspark.sql.GroupedData.avg
- pyspark.sql.GroupedData.cogroup
- pyspark.sql.GroupedData.count
- pyspark.sql.GroupedData.max
- pyspark.sql.GroupedData.mean
- pyspark.sql.GroupedData.min
- pyspark.sql.GroupedData.pivot
- pyspark.sql.GroupedData.sum
- pyspark.sql.PandasCogroupedOps.applyInArrow
- pyspark.sql.PandasCogroupedOps.applyInPandas
- Catalog
- pyspark.sql.Catalog.cacheTable
- pyspark.sql.Catalog.clearCache
- pyspark.sql.Catalog.createExternalTable
- pyspark.sql.Catalog.createTable
- pyspark.sql.Catalog.currentCatalog
- pyspark.sql.Catalog.currentDatabase
- pyspark.sql.Catalog.databaseExists
- pyspark.sql.Catalog.dropGlobalTempView
- pyspark.sql.Catalog.dropTempView
- pyspark.sql.Catalog.functionExists
- pyspark.sql.Catalog.getDatabase
- pyspark.sql.Catalog.getFunction
- pyspark.sql.Catalog.getTable
- pyspark.sql.Catalog.isCached
- pyspark.sql.Catalog.listCatalogs
- pyspark.sql.Catalog.listColumns
- pyspark.sql.Catalog.listDatabases
- pyspark.sql.Catalog.listFunctions
- pyspark.sql.Catalog.listTables
- pyspark.sql.Catalog.recoverPartitions
- pyspark.sql.Catalog.refreshByPath
- pyspark.sql.Catalog.refreshTable
- pyspark.sql.Catalog.registerFunction
- pyspark.sql.Catalog.setCurrentCatalog
- pyspark.sql.Catalog.setCurrentDatabase
- pyspark.sql.Catalog.tableExists
- pyspark.sql.Catalog.uncacheTable
- Avro
- Observation
- UDF
- UDTF
- VariantVal
- Protobuf
- Python Data Source
- pyspark.sql.datasource.DataSource.name
- pyspark.sql.datasource.DataSource.reader
- pyspark.sql.datasource.DataSource.schema
- pyspark.sql.datasource.DataSource.streamReader
- pyspark.sql.datasource.DataSource.writer
- pyspark.sql.datasource.DataSourceReader.partitions
- pyspark.sql.datasource.DataSourceReader.read
- pyspark.sql.datasource.DataSourceRegistration.register
- pyspark.sql.datasource.DataSourceStreamReader.commit
- pyspark.sql.datasource.DataSourceStreamReader.initialOffset
- pyspark.sql.datasource.DataSourceStreamReader.latestOffset
- pyspark.sql.datasource.DataSourceStreamReader.partitions
- pyspark.sql.datasource.DataSourceStreamReader.read
- pyspark.sql.datasource.DataSourceStreamReader.stop
- pyspark.sql.datasource.DataSourceWriter.abort
- pyspark.sql.datasource.DataSourceWriter.commit
- pyspark.sql.datasource.DataSourceWriter.write
- Stateful Processor