2/25/22, 10:09 PM GR5260_Class6 Jupyter Notebook
# plotting 2D, 3D
# series, dataframe
# data manipulation
Copyright By Assignmentchef assignmentchef
# data grouping/aggregation/concatenating dataframes
# basic statistics, moving statistics (running average)
# use case: list of FX transactions
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure()
# line plot
x = np.linspace(0, 1, 20)
plt.plot(x, y, label=plot1) plt.xlabel(x) plt.ylabel(y) plt.title(figure 1)
# scatter plot
x1 = np.random.rand(n)
x2 = x1**2 + 0.2* np.random.normal(size=n)
x3 = x1**2 x1*x2
plt.scatter(x1, x2, c=x3, cmap=brg, label=plot2) plt.colorbar()
plt.legend()
Out[3]:
localhost:8888/notebooks/2022_GR5260/GR5260_Class6.ipynb
2/25/22, 10:09 PM GR5260_Class6 Jupyter Notebook
fig2 = plt.figure(figsize=(18,5))
plt.subplot(1, 2, 1)
plt.plot(x, y, label=plot1)
plt.subplot(1, 2, 2)
plt.scatter(x1, x2, c=x3, cmap=brg, label=plot2)
Out[4]:
x = np.linspace(-1, 1, 3) # [-1, 0, 1]
y = np.linspace(11, 14, 4) # [11, 12, 13, 14] X, Y = np.meshgrid(x, y) # returns two 2D grids X
Out[5]: array([[-1., 0., 1.], [-1., 0., 1.], [-1., 0., 1.],
[-1.,0.,1.]])
Out[6]: array([[11., 11., 11.], [12., 12., 12.], [13., 13., 13.],
[14., 14., 14.]])
localhost:8888/notebooks/2022_GR5260/GR5260_Class6.ipynb
2/25/22, 10:09 PM GR5260_Class6 Jupyter Notebook
fig3 = plt.figure(figsize=(18,5))
ax = fig3.add_subplot(projection=3d) x = np.linspace(-1, 1, 20)
y = np.linspace(-1, 1, 20)
X, Y = np.meshgrid(x, y)
Z = 3*X**2 + Y**2
ax.plot_surface(X, Y, Z, cmap=jet)
2022-02-2510
2022-02-2611
2022-02-2712
2022-02-2813
2022-03-0114
Freq: D, Name: my series, dtype: int64
fig.savefig(plot1.png)
import pandas as pd
# pandas built on top of numpy
# Series: 1D array
a = np.arange(10, 15)
s = pd.Series(a, name=my series)
s.index = pd.date_range(2022/02/25, periods=5) s #, s.name
# access data in seroes
# numpy: s[0] s.loc[2022-02-27], s.iloc[2]
localhost:8888/notebooks/2022_GR5260/GR5260_Class6.ipynb
2/25/22, 10:09 PM GR5260_Class6 Jupyter Notebook
# slicing by year or month
s.loc[2022-02]
2022-02-2510
2022-02-2611
2022-02-2712
2022-02-2813
Freq: D, Name: my series, dtype: int64
# dataframe 2D array
row_labels = [a, b, c]
col_labels = [Open, Close]
x = np.random.randint(10, 20, size=(3, 2))
df = pd.DataFrame(data=x, index=row_labels, columns=col_labels) df
Open Close a 11 16 b 17 18 c 19 14
Name: Close, dtype: int64
Open 17
Close18
Name: b, dtype: int64
Open Close change a 11 16 5 b 17 18 1 c 19 14 -5
# data selection by label, position
df.loc[c, Close], df.iloc[1,1]
# select column
df[Close]
# select row
df.loc[b]
# derive data from existing data
df[change] = df[Close] df[Open] df
localhost:8888/notebooks/2022_GR5260/GR5260_Class6.ipynb
2/25/22, 10:09 PM GR5260_Class6 Jupyter Notebook
# insert new rows
df.loc[a+b] = df.loc[a] + df.loc[b] df
Open Close change a 11 16 5 b 17 18 1 c 19 14 -5
a+b 28 34 6
Open Close change a 11 16 5 b 17 18 1 c 19 14 -5
Open Close change a 11 16 5 b 17 18 1 c 19 14 -5
a+b 28 34 6
Open Close change a 11 16 5 a+b 28 34 6
# drop rows or columns
df1 = df.drop(a+b) #, axis=0) # returns a new dataframe df1
# insert column: df.insert
# query by criteria
df.query(change > 0)
df.query(Open > 10 and change/Open > 0.1)
localhost:8888/notebooks/2022_GR5260/GR5260_Class6.ipynb
2/25/22, 10:09 PM GR5260_Class6 Jupyter Notebook
# basic statistic
df.describe()
Out[24]: Open Close
25.3 29.2 5.7
count mean std min 25% 50% 75% max
4.000000 18.750000 7.041543 11.000000 15.500000 18.000000 21.250000 28.000000
4.000000 20.500000 9.146948 14.000000 15.500000 17.000000 22.000000 34.000000
4.00000 1.75000 4.99166
-5.00000 -0.50000 3.00000 5.25000 6.00000
df.quantile(0.9)
Name: 0.9, dtype: float64
# fill missing values
df.loc[c, Close] = np.nan
df.loc[a, Open] = np.nan
df2 = df.fillna(method=ffill) # forward fill, back fill, # return a new dataframe df2 = df2.fillna(method=bfill)
Open Close
a 17.0 b 17.0 c 19.0
16.0 18.0 18.0 34.0
fileloc = fx_trades.csv d = pd.read_csv(fileloc)
d.head(3) # tail()
TradeID Type
0 FX200171 Spot 2/16/2021 EUR 1000000 1 FX200171 Spot 2/16/2021 USD -1100000 2 FX200172 Spot 2/16/2021 GBP -1000000
Date Ccy Amt
localhost:8888/notebooks/2022_GR5260/GR5260_Class6.ipynb
2/25/22, 10:09 PM GR5260_Class6 Jupyter Notebook
g1 = d.groupby([Ccy]) # holder of info about how to do grouping, reference to orig
g1.get_group(EUR)
TradeID 0 FX200171 3 FX200172 8 FX200175 15 FX200178 16 FX200179
Spot 2/16/2021 Spot 2/16/2021 NDF 4/16/2021 Forward 5/16/2021 Forward 5/16/2021
EUR 1000000 EUR 1180000 EUR -200000 EUR -1170000 EUR -1000000
Out[30]: {BRL: [7, 9], CAD: [11], EUR: [0, 3, 8, 15, 16], GBP: [2, 4, 12, 14], US D: [1, 5, 6, 10, 13, 17]}
BRL CAD EUR GBP USD
-349000 6750000 -190000 0 -4660000
g2 = d.groupby([Ccy,Date])
s = g2.sum() # aggregation of cashflows by ccy by date, return a dataframe
Out[33]: Amt
Name: (EUR, 5/16/2021), dtype: int64
s.loc[EUR,5/16/2021]
s.to_csv(summed.csv)
localhost:8888/notebooks/2022_GR5260/GR5260_Class6.ipynb
2/25/22, 10:09 PM GR5260_Class6 Jupyter Notebook
{(BRL, 4/16/2021): [7, 9], (CAD, 5/16/2021): [11], (EUR, 2/16/2021):
[0, 3], (EUR, 4/16/2021): [8], (EUR, 5/16/2021): [15, 16], (GBP, 2/16/20
21): [2], (GBP, 3/16/2021): [4], (GBP, 5/16/2021): [12, 14], (USD, 2/1
6/2021): [1], (USD, 3/16/2021): [5], (USD, 4/16/2021): [6], (USD, 5/16/
2021): [10, 13, 17]}
localhost:8888/notebooks/2022_GR5260/GR5260_Class6.ipynb
CS: assignmentchef QQ: 1823890830 Email: [email protected]
Reviews
There are no reviews yet.