In [1]:
import pandas as pd
In [2]:
data = {
    'Name': ['John', 'Jane', 'Mike', 'Emily', 'Tom', 'Alex', 'Sara'],
    'Age': [25, 31, 45, 19, 27, 33, 28],
    'City': ['New York', 'London', 'Paris', 'Tokyo', 'Sydney', 'New York', 'New York'],
    'Salary': [50000, 75000, 90000, 40000, 60000, 80000, 65000]
}

df = pd.DataFrame(data)
In [3]:
# Print column as list
df['Name']
Out[3]:
0     John
1     Jane
2     Mike
3    Emily
4      Tom
5     Alex
6     Sara
Name: Name, dtype: object
In [4]:
# Print column as dataframe
df[['Name']]
Out[4]:
Name
0 John
1 Jane
2 Mike
3 Emily
4 Tom
5 Alex
6 Sara
In [5]:
# Print column as dataframe
df[['Name', 'City']]
Out[5]:
Name City
0 John New York
1 Jane London
2 Mike Paris
3 Emily Tokyo
4 Tom Sydney
5 Alex New York
6 Sara New York
In [6]:
# Get all unique entries in column
df['City'].unique()
Out[6]:
array(['New York', 'London', 'Paris', 'Tokyo', 'Sydney'], dtype=object)
In [7]:
# Get how much a single entrie exists in the dataframe
# df[['City']].value_counts() # value_counts of dataframe
df['City'].value_counts() # value_counts of specific column
Out[7]:
New York    3
London      1
Paris       1
Tokyo       1
Sydney      1
Name: City, dtype: int64
In [8]:
# Binary filter on string column
df[(df['City'].str.contains('New York'))]
Out[8]:
Name Age City Salary
0 John 25 New York 50000
5 Alex 33 New York 80000
6 Sara 28 New York 65000
In [9]:
# Binary filter on int column
df[(df['Salary'] >= 70000)]
Out[9]:
Name Age City Salary
1 Jane 31 London 75000
2 Mike 45 Paris 90000
5 Alex 33 New York 80000
In [10]:
# Binary filter on multiple int column
df[(df['Salary'] >= 70000) & (df['Age'] < 35)]
Out[10]:
Name Age City Salary
1 Jane 31 London 75000
5 Alex 33 New York 80000
In [12]:
# Binary filter to get all records where Salary is not null or NaN
df[(df['Salary'].notnull())]
Out[12]:
Name Age City Salary
0 John 25 New York 50000
1 Jane 31 London 75000
2 Mike 45 Paris 90000
3 Emily 19 Tokyo 40000
4 Tom 27 Sydney 60000
5 Alex 33 New York 80000
6 Sara 28 New York 65000