def grm_mml(dataset, options=None)
Estimate parameters for graded response model.
Estimate the discrimination and difficulty parameters for a graded response model using marginal maximum likelihood.
dataset
options
discrimination
difficulty
def grm_mml(dataset, options=None):
"""Estimate parameters for graded response model.
Estimate the discrimination and difficulty parameters for
a graded response model using marginal maximum likelihood.
Args:
dataset: [n_items, n_participants] 2d array of measured responses
options: dictionary with updates to default options
Returns:
discrimination: (1d array) estimate of item discriminations
difficulty: (2d array) estimates of item diffiulties by item thresholds
Options:
* max_iteration: int
* distribution: callable
* quadrature_bounds: (float, float)
* quadrature_n: int
"""
options = validate_estimation_options(options)
quad_start, quad_stop = options['quadrature_bounds']
quad_n = options['quadrature_n']
responses, item_counts = condition_polytomous_response(dataset, trim_ends=False)
n_items = responses.shape[0]
# Interpolation Locations
theta = _get_quadrature_points(quad_n, quad_start, quad_stop)
distribution = options['distribution'](theta)
# Compute the values needed for integral equations
integral_counts = list()
for ndx in range(n_items):
temp_output = _solve_for_constants(responses[ndx])
integral_counts.append(temp_output)
# Initialize difficulty parameters for estimation
betas = np.full((item_counts.sum(),), -10000.0)
discrimination = np.ones_like(betas)
cumulative_item_counts = item_counts.cumsum()
start_indices = np.roll(cumulative_item_counts, 1)
start_indices[0] = 0
for ndx in range(n_items):
end_ndx = cumulative_item_counts[ndx]
start_ndx = start_indices[ndx] + 1
betas[start_ndx:end_ndx] = np.linspace(-1, 1,
item_counts[ndx] - 1)
betas_roll = np.roll(betas, -1)
betas_roll[cumulative_item_counts-1] = 10000
#############
# 1. Start the iteration loop
# 2. estimate discrimination
# 3. solve for difficulties
# 4. minimize and repeat
#############
for iteration in range(options['max_iteration']):
previous_discrimination = discrimination.copy()
previous_betas = betas.copy()
previous_betas_roll = betas_roll.copy()
# Quadrature evaluation for values that do not change
# This is done during the outer loop to address rounding errors
partial_int = _graded_partial_integral(theta, betas, betas_roll,
discrimination, responses)
partial_int *= distribution
for item_ndx in range(n_items):
# pylint: disable=cell-var-from-loop
# Indices into linearized difficulty parameters
start_ndx = start_indices[item_ndx]
end_ndx = cumulative_item_counts[item_ndx]
old_values = _graded_partial_integral(theta, previous_betas,
previous_betas_roll,
previous_discrimination,
responses[item_ndx][None, :])
partial_int /= old_values
def _local_min_func(estimate):
# Solve integrals for diffiulty estimates
new_betas = _solve_integral_equations(estimate,
integral_counts[item_ndx],
distribution,
theta)
betas[start_ndx+1:end_ndx] = new_betas
betas_roll[start_ndx:end_ndx-1] = new_betas
discrimination[start_ndx:end_ndx] = estimate
new_values = _graded_partial_integral(theta, betas, betas_roll,
discrimination,
responses[item_ndx][None, :])
new_values *= partial_int
otpt = integrate.fixed_quad(
lambda x: new_values, quad_start, quad_stop, n=quad_n)[0]
return -np.log(otpt).sum()
# Univariate minimization for discrimination parameter
fminbound(_local_min_func, 0.2, 5.0)
new_values = _graded_partial_integral(theta, betas, betas_roll,
discrimination,
responses[item_ndx][None, :])
partial_int *= new_values
if np.abs(previous_discrimination - discrimination).max() < 1e-3:
break
# Trim difficulties to conform to standard output
# TODO: look where missing values are and place NAN there instead
# of appending them to the end
output_betas = np.full((n_items, item_counts.max()-1), np.nan)
for ndx, (start_ndx, end_ndx) in enumerate(zip(start_indices, cumulative_item_counts)):
output_betas[ndx, :end_ndx-start_ndx-1] = betas[start_ndx+1:end_ndx]
return discrimination[start_indices], output_betas