# Load the MASS library, which contains the crabs dataset. library(MASS) # Load the crabs dataset. This dataset contains measurements of morphological features of crabs. data(crabs) # View the documentation for the function model.matrix, which is used to construct the design matrix for a linear model. help(model.matrix) # Create the design matrix for a linear model predicting carapace length (CL) from front limb size (FL) and sex. # The function model.matrix generates a model (or design) matrix, which includes dummy variables for the categorical predictor (sex). X <- model.matrix(CL ~ FL + sex, data = crabs) X # Print the generated design matrix. # Fit a linear model (model1) where the response variable is carapace length (CL) and the predictors are front limb size (FL) and sex. # The function lm() fits a linear regression model. model1 <- lm(CL ~ FL + sex, data = crabs) # View the summary of model1, which includes regression coefficients, R-squared, p-values, and more. summary(model1) # Relevel the factor sex so that "M" (Male) becomes the reference level. By default, the first level alphabetically ("F" in this case) is the reference. # This releveling changes the baseline category used in the linear regression model. crabs$sex <- relevel(crabs$sex, "M") # Fit another linear model (model2) after releveling sex. Now, "M" is the reference level in the regression model. model2 <- lm(CL ~ FL + sex, data = crabs) # View the summary of model2, which now uses "M" (Male) as the reference level for the categorical predictor. summary(model2) # Fit a third linear model (model3) that omits the intercept (using `-1`). # This model will include separate coefficients for each level of 'sex' without combining them into a reference and a difference. model3 <- lm(CL ~ FL + sex - 1, data = crabs) # View the summary of model3. In this model, there is no intercept, so the coefficients represent the direct effects of each sex level. summary(model3) # Change the contrast coding for the factor sex to sum contrasts (contr.sum), which ensures that the coefficients for the categorical variable sum to zero. # Sum contrasts allow for a comparison between each level of the factor and the overall mean rather than just a reference level. contrasts(crabs$sex) = contr.sum(levels(crabs$sex)) # Fit a fourth linear model (model4) with the new sum contrasts applied to the sex variable. model4 <- lm(CL ~ FL + sex, data = crabs) # View the summary of model4. The coefficients for sex now reflect the deviation from the overall mean rather than a specific reference category. summary(model4)