import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("../data/lidar_data_with_audio_timestamps_oct_28.csv", index_col=0)
df = df[["height", "width", "datetime_enter", "datetime_leave"]]
df
# Read one column
df["height"]
# Read multiple columns
df[["height", "width"]]
# Read one row
df.loc[113]
# Read all rows that fulfill a condition
df[df["height"] > 200]
#...
# Get the first 10 columns
df.head(10)
# Get the last 10 columns
df.tail(10)
# Some info about the data
df.info()
df["height"].value_counts()
# Plot some column
df["height"].plot(kind="hist", bins=6)
df.plot(kind="scatter", x="height", y="width", xlabel="Height", ylabel="Width")
# Create column
df["area"] = df["height"] * df["width"]
df
# Create a column by labels
def create_label(row):
if row["height"] > 200:
return "big"
else:
return "small"
df["label"] = df.apply(create_label, axis=1)
df
df["label"].value_counts()