T |
Transpose DataFrame - swap rows and columns |
df.T() |
abs |
Return absolute value of each element |
df.abs() |
add |
Add elements to DataFrame |
df.add(other) |
add_prefix |
Add prefix to column labels |
df.add_prefix('col_') |
add_suffix |
Add suffix to column labels |
df.add_suffix('_2024') |
agg |
Aggregate using one or more operations |
df.agg(['sum', 'mean']) |
aggregate |
Aggregate using one or more operations |
df.aggregate(['sum', 'min']) |
align |
Align two DataFrames on axes |
df1.align(df2) |
all |
Return whether all elements are True |
df.all(axis=1) |
any |
Return whether any element is True |
df.any(axis=0) |
append |
Append rows of other DataFrame |
df.append(other) |
apply |
Apply function along axis |
df.apply(lambda x: x*2) |
applymap |
Apply function element-wise |
df.applymap(str.upper) |
asfreq |
Convert time series to specified frequency |
df.asfreq('D') |
asof |
Return last non-NaN value |
df.asof('2024-01-01') |
assign |
Assign new columns to DataFrame |
df.assign(new_col=df['a']*2) |
astype |
Cast object to dtype |
df.astype('float32') |
at |
Access single value by label |
df.at[0, 'column'] |
at_time |
Select values at particular time |
df.at_time('09:30') |
attrs |
Dictionary of global attributes |
df.attrs['description'] |
axes |
Return list of axis labels |
df.axes |
backfill |
Backward fill NaN values |
df.backfill() |
between_time |
Select values between times |
df.between_time('09:00', '17:00') |
bfill |
Backward fill NaN values |
df.bfill() |
bool |
Return bool of single element |
df.bool() |
boxplot |
Make box plot from DataFrame |
df.boxplot() |
clip |
Trim values at thresholds |
df.clip(lower=0, upper=100) |
columns |
Column labels of DataFrame |
df.columns |
combine |
Combine DataFrames element-wise |
df1.combine(df2, np.minimum) |
combine_first |
Combine with other DataFrame, filling nulls |
df1.combine_first(df2) |
compare |
Compare DataFrames and show differences |
df1.compare(df2) |
convert_dtypes |
Convert columns to best possible dtypes |
df.convert_dtypes() |
copy |
Copy DataFrame |
df.copy() |
corr |
Compute pairwise correlation |
df.corr() |
corrwith |
Compute pairwise correlation with another |
df.corrwith(other) |
count |
Count non-NA cells |
df.count() |
cov |
Compute covariance |
df.cov() |
cummax |
Cumulative maximum |
df.cummax() |
cummin |
Cumulative minimum |
df.cummin() |
cumprod |
Cumulative product |
df.cumprod() |
cumsum |
Cumulative sum |
df.cumsum() |
describe |
Generate descriptive statistics |
df.describe() |
diff |
First discrete difference |
df.diff() |
div |
Floating division |
df.div(other) |
divide |
Floating division |
df.divide(other) |
dot |
Matrix multiplication |
df.dot(other) |
drop |
Drop specified labels |
df.drop(['col1'], axis=1) |
drop_duplicates |
Drop duplicate rows |
df.drop_duplicates() |
droplevel |
Drop levels from MultiIndex |
df.droplevel(1) |
dropna |
Drop missing values |
df.dropna() |
dtypes |
Return dtypes of columns |
df.dtypes |
duplicated |
Return boolean Series for duplicates |
df.duplicated() |
empty |
Check if DataFrame is empty |
df.empty |
eq |
Equal comparison |
df.eq(other) |
equals |
Check if two DataFrames are equal |
df.equals(other) |
eval |
Evaluate string expression |
df.eval('A + B') |
ewm |
Exponential weighted functions |
df.ewm(span=3).mean() |
expanding |
Expanding window functions |
df.expanding().mean() |
explode |
Transform list-like to rows |
df.explode('col') |
ffill |
Forward fill NaN values |
df.ffill() |
fillna |
Fill NaN values |
df.fillna(0) |
filter |
Filter DataFrame by labels |
df.filter(items=['A', 'B']) |
first |
Select first periods |
df.first('5D') |
first_valid_index |
Return index of first non-NA value |
df.first_valid_index() |
flags |
Get flags of DataFrame |
df.flags |
floordiv |
Integer division |
df.floordiv(other) |
from_dict |
Create DataFrame from dict |
pd.DataFrame.from_dict(data) |
from_records |
Create DataFrame from records |
pd.DataFrame.from_records(records) |
ge |
Greater than or equal comparison |
df.ge(other) |
get |
Get item from object |
df.get('column', default=0) |
groupby |
Group DataFrame by mapping |
df.groupby('col').mean() |
gt |
Greater than comparison |
df.gt(other) |
head |
Return first n rows |
df.head(10) |
hist |
Plot histogram |
df.hist() |
iat |
Access single value by position |
df.iat[0, 1] |
idxmax |
Return index of maximum |
df.idxmax() |
idxmin |
Return index of minimum |
df.idxmin() |
iloc |
Purely integer-location indexing |
df.iloc[0:5, 0:2] |
index |
Index of DataFrame |
df.index |
infer_objects |
Infer better dtypes |
df.infer_objects() |
info |
Print concise summary |
df.info() |
insert |
Insert column at location |
df.insert(1, 'new', values) |
interpolate |
Interpolate NaN values |
df.interpolate() |
isetitem |
Set item by position |
df.isetitem(0, values) |
isin |
Check if values in Series/DataFrame |
df.isin([1, 2, 3]) |
isna |
Detect missing values |
df.isna() |
isnull |
Detect missing values |
df.isnull() |
items |
Iterate over columns |
for label, content in df.items() |
iterrows |
Iterate over DataFrame rows |
for index, row in df.iterrows() |
itertuples |
Iterate as namedtuples |
for row in df.itertuples() |
join |
Join columns with other DataFrame |
df.join(other, on='key') |
keys |
Get axis labels |
df.keys() |
kurt |
Unbiased kurtosis |
df.kurt() |
kurtosis |
Unbiased kurtosis |
df.kurtosis() |
last |
Select last periods |
df.last('5D') |
last_valid_index |
Index of last non-NA value |
df.last_valid_index() |
le |
Less than or equal comparison |
df.le(other) |
loc |
Label-location indexing |
df.loc[:, 'A':'C'] |
lt |
Less than comparison |
df.lt(other) |
map |
Apply function element-wise |
df['col'].map(func) |
mask |
Replace values where condition is True |
df.mask(df < 0, 0) |
max |
Maximum of values |
df.max() |
mean |
Mean of values |
df.mean() |
median |
Median of values |
df.median() |
melt |
Unpivot DataFrame |
df.melt(id_vars=['id']) |
memory_usage |
Memory usage of DataFrame |
df.memory_usage() |
merge |
Merge DataFrames |
df.merge(df2, on='key') |
min |
Minimum of values |
df.min() |
mod |
Modulo |
df.mod(other) |
mode |
Mode of values |
df.mode() |
mul |
Multiplication |
df.mul(other) |
multiply |
Multiplication |
df.multiply(other) |
ndim |
Number of dimensions |
df.ndim |
ne |
Not equal comparison |
df.ne(other) |
nlargest |
Return n largest values |
df.nlargest(5, 'col') |
notna |
Detect non-missing values |
df.notna() |
notnull |
Detect non-missing values |
df.notnull() |
nsmallest |
Return n smallest values |
df.nsmallest(5, 'col') |
nunique |
Count unique values |
df.nunique() |
pad |
Forward fill NaN values |
df.pad() |
pct_change |
Percentage change |
df.pct_change() |
pipe |
Apply chain of functions |
df.pipe(func1).pipe(func2) |
pivot |
Reshape data using pivot |
df.pivot(index='date', columns='type') |
pivot_table |
Create pivot table |
df.pivot_table(values='value', index='date') |
plot |
Plot DataFrame |
df.plot() |
pop |
Pop column and return |
df.pop('col') |
pow |
Exponentiation |
df.pow(other) |
prod |
Product of values |
df.prod() |
product |
Product of values |
df.product() |
quantile |
Return quantile |
df.quantile(0.5) |
query |
Query DataFrame with expression |
df.query('col > 5') |
radd |
Reverse addition |
df.radd(other) |
rank |
Rank of values |
df.rank() |
rdiv |
Reverse division |
df.rdiv(other) |
reindex |
Conform DataFrame to new index |
df.reindex(new_index) |
reindex_like |
Reindex to match other DataFrame |
df.reindex_like(other) |
rename |
Rename columns/index |
df.rename(columns={'old': 'new'}) |
rename_axis |
Set axis name |
df.rename_axis('date') |
reorder_levels |
Reorder index levels |
df.reorder_levels([1,0]) |
replace |
Replace values |
df.replace(old, new) |
resample |
Resample time-series data |
df.resample('M').mean() |
reset_index |
Reset index to default |
df.reset_index() |
rfloordiv |
Reverse integer division |
df.rfloordiv(other) |
rmod |
Reverse modulo |
df.rmod(other) |
rmul |
Reverse multiplication |
df.rmul(other) |
rolling |
Rolling window calculations |
df.rolling(window=3).mean() |
round |
Round values |
df.round(2) |
rpow |
Reverse exponentiation |
df.rpow(other) |
rsub |
Reverse subtraction |
df.rsub(other) |
rtruediv |
Reverse floating division |
df.rtruediv(other) |
sample |
Random sample of rows |
df.sample(5) |
select_dtypes |
Select columns by dtype |
df.select_dtypes(include=['number']) |
sem |
Standard error of mean |
df.sem() |
set_axis |
Set axis labels |
df.set_axis(new_labels, axis=1) |
set_flags |
Set flags |
df.set_flags(allows_duplicate_labels=False) |
set_index |
Set DataFrame index |
df.set_index('col') |
shape |
Tuple of dimensions |
df.shape |
shift |
Shift index by periods |
df.shift(1) |
size |
Number of elements |
df.size |
skew |
Unbiased skewness |
df.skew() |
sort_index |
Sort by index |
df.sort_index() |
sort_values |
Sort by values |
df.sort_values('col') |
squeeze |
Squeeze 1D axes |
df.squeeze() |
stack |
Stack columns to rows |
df.stack() |
std |
Standard deviation |
df.std() |
sub |
Subtraction |
df.sub(other) |
subtract |
Subtraction |
df.subtract(other) |
sum |
Sum of values |
df.sum() |
swapaxes |
Swap axes |
df.swapaxes(0, 1) |
swaplevel |
Swap levels in MultiIndex |
df.swaplevel(0, 1) |
tail |
Return last n rows |
df.tail(5) |
take |
Take by positions |
df.take([0, 2, 4]) |
to_clipboard |
Copy to clipboard |
df.to_clipboard() |
to_csv |
Write to CSV |
df.to_csv('file.csv') |
to_dict |
Convert to dictionary |
df.to_dict() |
to_excel |
Write to Excel |
df.to_excel('file.xlsx') |
to_feather |
Write to Feather format |
df.to_feather('file.feather') |
to_gbq |
Write to BigQuery |
df.to_gbq('dataset.table') |
to_hdf |
Write to HDF5 |
df.to_hdf('file.h5', 'key') |
to_html |
Render as HTML table |
df.to_html('file.html') |
to_json |
Write to JSON |
df.to_json('file.json') |
to_latex |
Render as LaTeX table |
df.to_latex() |
to_markdown |
Render as Markdown |
df.to_markdown() |
to_numpy |
Convert to numpy array |
df.to_numpy() |
to_orc |
Write to ORC format |
df.to_orc('file.orc') |
to_parquet |
Write to Parquet |
df.to_parquet('file.parquet') |
to_period |
Convert to PeriodIndex |
df.to_period('M') |
to_pickle |
Serialize to pickle |
df.to_pickle('file.pkl') |
to_records |
Convert to record array |
df.to_records() |
to_sql |
Write to SQL database |
df.to_sql('table', con) |
to_stata |
Write to Stata format |
df.to_stata('file.dta') |
to_string |
Render as string |
print(df.to_string()) |
to_timestamp |
Convert to Timestamp |
df.to_timestamp() |
to_xarray |
Convert to xarray |
df.to_xarray() |
to_xml |
Render as XML |
df.to_xml('file.xml') |
transform |
Apply function and return same shape |
df.transform('sqrt') |
transpose |
Transpose index and columns |
df.transpose() |
truediv |
Floating division |
df.truediv(other) |
truncate |
Truncate before/after |
df.truncate(before='2024-01-01') |
tz_convert |
Convert timezone |
df.tz_convert('US/Eastern') |
tz_localize |
Localize timezone |
df.tz_localize('UTC') |
unstack |
Unstack level to columns |
df.unstack() |
update |
Update with other DataFrame |
df.update(other) |
value_counts |
Count unique values |
df.value_counts() |
values |
Return numpy array |
df.values |
var |
Variance |
df.var() |
where |
Replace values where condition is False |
df.where(df > 0, 0) |
xs |
Cross-section from Series/DataFrame |
df.xs('key', level='level') |