import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display
crop = pd.read_csv('yield_df.csv')
crop.shape
crop.describe
crop.columns.tolist()
crop.drop('Unnamed: 0', axis=1, inplace=True)
crop.isnull().sum()
crop.head(5)
crop['Year'].max()
area_dropdown = widgets.Dropdown(options=crop['Area'].unique(),description='Area:')
item_dropdown = widgets.Dropdown(options=crop['Item'].unique(),description='Item:')
def plotdata(area, item):
plt.figure(figsize=(10,6))
temp_crop = crop[(crop['Area'] == area) & (crop['Item'] == item)]
plt.plot(temp_crop['Year'], temp_crop['hg/ha_yield'])
plt.title(f'Yield(HG/HA) by year for {item} in {area}')
plt.xlabel('Year')
plt.ylabel("Yield(HG/HA)")
plt.grid(True)
plt.show()
widgets.interactive(plotdata, area=area_dropdown, item=item_dropdown)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, confusion_matrix
X = crop.drop('hg/ha_yield', axis=1)
y = crop['hg/ha_yield']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)
categorical_features = ['Area', 'Item']
numerical_features = ['Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), numerical_features),
('cat', OneHotEncoder(), categorical_features)])
models = [
('Linear Regression', LinearRegression()),
('Decision Tree', DecisionTreeRegressor(random_state=42)),
('Random Forest', RandomForestRegressor(random_state=42)),
('Support Vector Machine', SVR())
]
for name, model, in models:
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'{name}:')
print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
print(f'R^2 Score: {r2}\n')