In this note, a general dataframe is called df
(type pandas.core.frame.DataFrame
), a general series is call s
(type pandas.core.series.Series
).
import pandas as pd
import numpy as np # import numpy if necessary
.csv
file# READ
df = pd.read_csv('filename.csv', sep=';') # default sep=','
# if 1st col contains 0,1,2,...
df = pd.read_csv('filename.csv', index_col=1)
# with datetime info
df = pd.read_csv(PATH_DATA_FOLDER+"raw_data.csv",
parse_dates=['timestamp'],
infer_datetime_format=True,
cache_dates=True)
# WRITE
df.to_csv(path, index=False) # don't incldue index
# FROM A LIST
pd.DataFrame(a_list, colummns=['col_name'])
# FROM A DICTIONARY
names = ['John', 'Thi', 'Bi', 'Beo', 'Chang']
ages = [10, 20, 21, 18, 11]
marks = [8, 9, 10, 6, 8]
city = ['Ben Tre', 'Paris', 'Ho Chi Minh Ville', 'New York', 'DC']
my_dict = {'Name':names, 'Ages':ages, 'Marks':marks, 'Place': city}
students = pd.DataFrame(my_dict)
Name | Ages | Marks | Place | |
---|---|---|---|---|
0 | John | 10 | 8 | Ben Tre |
1 | Thi | 20 | 9 | Paris |
2 | Bi | 21 | 10 | Ho Chi Minh Ville |
3 | Beo | 18 | 6 | New York |
4 | Chang | 11 | 8 | DC |
# a column
df['new_col] = [new_values]
# a row
df.loc['new_index'] = [new_value]
# add a new col based on another's values
df_im = df0.copy()[['col']]
df_im['status'] = df0['col'].apply(lambda row: 1 if row>=80 else 0)
# shuffle all rows and reset the index
df_new = df.sample(frac=1).reset_index(drop=True)
df.sort_values(by='col1', ascending=False)
👉 Indexing and selecting data — pandas 1.1.2 documentation