def pcm_mml(dataset, options=None)
Estimate parameters for partial credit model.
Estimate the discrimination and difficulty parameters for the partial credit model using marginal maximum likelihood.
dataset
options
discrimination
difficulty
def pcm_mml(dataset, options=None):
"""Estimate parameters for partial credit model.
Estimate the discrimination and difficulty parameters for
the partial credit model using marginal maximum likelihood.
Args:
dataset: [n_items, n_participants] 2d array of measured responses
options: dictionary with updates to default options
Returns:
discrimination: (1d array) estimates of item discrimination
difficulty: (2d array) estimates of item difficulties x item thresholds
Options:
* max_iteration: int
* distribution: callable
* quadrature_bounds: (float, float)
* quadrature_n: int
"""
options = validate_estimation_options(options)
quad_start, quad_stop = options['quadrature_bounds']
quad_n = options['quadrature_n']
responses, item_counts = condition_polytomous_response(dataset, trim_ends=False,
_reference=0.0)
n_items = responses.shape[0]
# Interpolation Locations
theta = _get_quadrature_points(quad_n, quad_start, quad_stop)
distribution = options['distribution'](theta)
# Initialize difficulty parameters for estimation
betas = np.full((n_items, item_counts.max()), np.nan)
discrimination = np.ones((n_items,))
partial_int = np.ones((responses.shape[1], theta.size))
# Not all items need to have the same
# number of response categories
betas[:, 0] = 0
for ndx in range(n_items):
betas[ndx, 1:item_counts[ndx]] = np.linspace(-1, 1, item_counts[ndx]-1)
#############
# 1. Start the iteration loop
# 2. Estimate Dicriminatin/Difficulty Jointly
# 3. Integrate of theta
# 4. minimize and repeat
#############
for iteration in range(options['max_iteration']):
previous_discrimination = discrimination.copy()
previous_betas = betas.copy()
# Quadrature evaluation for values that do not change
# This is done during the outer loop to address rounding errors
# and for speed
partial_int *= 0.0
partial_int += distribution[None, :]
for item_ndx in range(n_items):
partial_int *= _credit_partial_integral(theta, betas[item_ndx],
discrimination[item_ndx],
responses[item_ndx])
# Loop over each item and solve for the alpha / beta parameters
for item_ndx in range(n_items):
# pylint: disable=cell-var-from-loop
item_length = item_counts[item_ndx]
new_betas = np.zeros((item_length))
# Remove the previous output
old_values = _credit_partial_integral(theta, previous_betas[item_ndx],
previous_discrimination[item_ndx],
responses[item_ndx])
partial_int /= old_values
def _local_min_func(estimate):
new_betas[1:] = estimate[1:]
new_values = _credit_partial_integral(theta, new_betas,
estimate[0],
responses[item_ndx])
new_values *= partial_int
otpt = integrate.fixed_quad(
lambda x: new_values, quad_start, quad_stop, n=quad_n)[0]
return -np.log(otpt).sum()
# Initial Guess of Item Parameters
initial_guess = np.concatenate(([discrimination[item_ndx]],
betas[item_ndx, 1:item_length]))
otpt = fmin_slsqp(_local_min_func, initial_guess,
disp=False,
bounds=[(.25, 4)] + [(-6, 6)] * (item_length - 1))
discrimination[item_ndx] = otpt[0]
betas[item_ndx, 1:item_length] = otpt[1:]
new_values = _credit_partial_integral(theta, betas[item_ndx],
discrimination[item_ndx],
responses[item_ndx])
partial_int *= new_values
if np.abs(previous_discrimination - discrimination).max() < 1e-3:
break
# TODO: look where missing values are and place NAN there instead
# of appending them to the end
return discrimination, betas[:, 1:]