MeasureJackknife

measureia.MeasureJackknife

Bases: MeasureWBox, MeasureMultipolesBox, MeasureWLightcone, MeasureMultipolesLightcone

Class that contains all methods for jackknife covariance measurements for IA correlation functions.

Methods:

Name Description
_measure_jackknife_covariance_sims

Measures all jackknife realisations for MeasureIABox and combines into covariance using 1 or more CPUs.

_measure_jackknife_covariance_sims_multiprocessing

Measures all jackknife realisations for MeasureIABox and combines into covariance using >1 CPU.

_measure_jackknife_realisations_obs

Measures all jackknife realisations for MeasureIALightcone using 1 or more CPUs.

_measure_jackknife_covariance_obs

Combines jackknife realisations for MeasureIALightcone into covariance.

_measure_jackknife_realisations_obs_multiprocessing

Measures all jackknife realisations for MeasureIALightcone using >1 CPU.

measure_covariance_multiple_datasets

Given the jackknife realisations of two datasets, creates the cross covariance.

create_full_cov_matrix_projections

Creates larger covariance matrix of multiple datasets including cross terms.

Notes

Inherits attributes from 'SimInfo', where 'boxsize', 'L_0p5' and 'snap_group' are used in this class. Inherits attributes from 'MeasureIABase', where 'data', 'output_file_name', 'periodicity', 'Num_position', 'Num_shape', 'r_min', 'r_max', 'num_bins_r', 'num_bins_pi', 'r_bins', 'pi_bins', 'mu_r_bins' are used.

Source code in src/measureia/measure_jackknife.py
class MeasureJackknife(MeasureWBox, MeasureMultipolesBox, MeasureWLightcone,
					   MeasureMultipolesLightcone):
	"""Class that contains all methods for jackknife covariance measurements for IA correlation functions.

	Methods
	-------
	_measure_jackknife_covariance_sims()
		Measures all jackknife realisations for MeasureIABox and combines into covariance using 1 or more CPUs.
	_measure_jackknife_covariance_sims_multiprocessing()
		Measures all jackknife realisations for MeasureIABox and combines into covariance using >1 CPU.
	_measure_jackknife_realisations_obs()
		Measures all jackknife realisations for MeasureIALightcone using 1 or more CPUs.
	_measure_jackknife_covariance_obs()
		Combines jackknife realisations for MeasureIALightcone into covariance.
	_measure_jackknife_realisations_obs_multiprocessing()
		Measures all jackknife realisations for MeasureIALightcone using >1 CPU.
	measure_covariance_multiple_datasets()
		Given the jackknife realisations of two datasets, creates the cross covariance.
	create_full_cov_matrix_projections()
		Creates larger covariance matrix of multiple datasets including cross terms.

	Notes
	-----
	Inherits attributes from 'SimInfo', where 'boxsize', 'L_0p5' and 'snap_group' are used in this class.
	Inherits attributes from 'MeasureIABase', where 'data', 'output_file_name', 'periodicity', 'Num_position',
	'Num_shape', 'r_min', 'r_max', 'num_bins_r', 'num_bins_pi', 'r_bins', 'pi_bins', 'mu_r_bins' are used.
	"""

	def __init__(
			self,
			data,
			output_file_name,
			simulation=None,
			snapshot=None,
			separation_limits=[0.1, 20.0],
			num_bins_r=8,
			num_bins_pi=20,
			pi_max=None,
			boxsize=None,
			periodicity=True,
	):
		"""
		The __init__ method of the MeasureJackknife class.

		Notes
		-----
		Constructor parameters 'data', 'output_file_name', 'simulation', 'snapshot', 'separation_limits', 'num_bins_r',
		'num_bins_pi', 'pi_max', 'boxsize' and 'periodicity' are passed to MeasureIABase.

		"""
		super().__init__(data, output_file_name, simulation, snapshot, separation_limits, num_bins_r, num_bins_pi,
						 pi_max, boxsize, periodicity)
		return

	def _measure_jackknife_covariance_sims(
			self, dataset_name, corr_type, masks=None, L_subboxes=3, rp_cut=None,
			tree_saved=True, file_tree_path=None, remove_tree_file=True, num_nodes=None
	):
		"""Measures the covariance in the projected correlation functions using the jackknife method.
		The box is divided into L_subboxes^3 subboxes; the correlation function is calculated omitting one box at a time.
		Then the covariance is computed by combining the information of all realisations.
		This method uses 1 or more CPUs.

		Parameters
		----------
		dataset_name : str
			Name of the dataset in the output file.
		corr_type : iterable with 2 str entries
			Array with two entries. For first choose from [gg, g+, both], for second from [w, multipoles].
		masks : dict or NoneType, optional
			See MeasureIABox methods. Default is None.
		L_subboxes : int, optional
			Integer by which the length of the side of the box should be divided. Total number of jackknife realisations
			will be L_subboxes^3. Default is 3.
		rp_cut : float or NoneType, optional
			See MeasureIABox.measure_xi_multipoles. Default is None.
		tree_saved : bool, optional
			If True, a method using trees has been used in the correlation measurement and the tree information is
			saved. Default is True.
		file_tree_path : str or NoneType, optional
			Data path to tree information. See MeasureIABox methods. Default is None.
		remove_tree_file : bool, optional
			If True, the tree information file is removed after the jackknife covariance is measured. Default is True.
		num_nodes : int or NoneType, optional
			Number of cores to be used in multiprocessing. Default is None.

		"""
		if corr_type is None:
			corr_type = ["both", "multipoles"]
		if corr_type[0] == "both":
			data = [corr_type[1] + "_g_plus", corr_type[1] + "_gg"]
		elif corr_type[0] == "g+":
			data = [corr_type[1] + "_g_plus"]
		elif corr_type[0] == "gg":
			data = [corr_type[1] + "_gg"]
		else:
			raise KeyError("Unknown value for corr_type. Choose from [g+, gg, both]")
		figname_dataset_name = dataset_name
		if "/" in dataset_name:
			figname_dataset_name = figname_dataset_name.replace("/", "_")
		if "." in dataset_name:
			figname_dataset_name = figname_dataset_name.replace(".", "p")
		L_sub = self.L_0p5 * 2.0 / L_subboxes
		num_box = 0
		for i in np.arange(0, L_subboxes):
			for j in np.arange(0, L_subboxes):
				for k in np.arange(0, L_subboxes):
					x_bounds = [i * L_sub, (i + 1) * L_sub]
					y_bounds = [j * L_sub, (j + 1) * L_sub]
					z_bounds = [k * L_sub, (k + 1) * L_sub]
					x_mask = (self.data["Position"][:, 0] > x_bounds[0]) * (self.data["Position"][:, 0] < x_bounds[1])
					y_mask = (self.data["Position"][:, 1] > y_bounds[0]) * (self.data["Position"][:, 1] < y_bounds[1])
					z_mask = (self.data["Position"][:, 2] > z_bounds[0]) * (self.data["Position"][:, 2] < z_bounds[1])
					mask_position = np.invert(
						x_mask * y_mask * z_mask
					)  # mask that is True for all positions not in the subbox
					if self.Num_position == self.Num_shape:
						mask_shape = mask_position
					else:
						x_mask = (self.data["Position_shape_sample"][:, 0] > x_bounds[0]) * (
								self.data["Position_shape_sample"][:, 0] < x_bounds[1]
						)
						y_mask = (self.data["Position_shape_sample"][:, 1] > y_bounds[0]) * (
								self.data["Position_shape_sample"][:, 1] < y_bounds[1]
						)
						z_mask = (self.data["Position_shape_sample"][:, 2] > z_bounds[0]) * (
								self.data["Position_shape_sample"][:, 2] < z_bounds[1]
						)
						mask_shape = np.invert(
							x_mask * y_mask * z_mask
						)  # mask that is True for all positions not in the subbox
					if tree_saved:
						if masks != None:
							indices_shape = np.where(mask_shape[masks["Position_shape_sample"]])[0]
							mask_not_position = np.invert(mask_position[masks["Position"]])
							indices_not_position = np.where(mask_not_position)[0]
							masks_total = {
								"Position": masks["Position"],
								"Position_shape_sample": masks["Position_shape_sample"],
								"Axis_Direction": masks["Position_shape_sample"],
								"q": masks["Position_shape_sample"],
								"weight": masks["Position"],
								"weight_shape_sample": masks["Position_shape_sample"],
							}
						else:
							indices_shape = np.where(mask_shape)[0]
							mask_not_position = np.invert(mask_position)
							indices_not_position = np.where(mask_not_position)[0]
							masks_total = None
						tree_input = [indices_not_position, indices_shape]

					else:
						if masks != None:
							mask_position = mask_position * masks["Position"]
							mask_shape = mask_shape * masks["Position_shape_sample"]
						tree_input = None
						masks_total = {
							"Position": mask_position,
							"Position_shape_sample": mask_shape,
							"Axis_Direction": mask_shape,
							"q": mask_shape,
							"weight": mask_position,
							"weight_shape_sample": mask_shape,
						}
					if corr_type[1] == "multipoles":
						if num_nodes == None:
							self._measure_xi_r_mur_sims_tree(
								tree_input=tree_input,
								masks=masks_total,
								rp_cut=rp_cut,
								dataset_name=dataset_name + "_" + str(num_box),
								print_num=False,
								save_tree=False,
								dataset_name_tree=f"m_{self.simname}_tree_{figname_dataset_name}",
								file_tree_path=file_tree_path,
								jk_group_name=f"{dataset_name}_jk{L_subboxes ** 3}",
							)
						else:
							self._measure_xi_r_mur_sims_multiprocessing(
								masks=masks_total,
								rp_cut=rp_cut,
								dataset_name=dataset_name + "_" + str(num_box),
								print_num=False,
								num_nodes=num_nodes,
								jk_group_name=f"{dataset_name}_jk{L_subboxes ** 3}",
							)
						self._measure_multipoles(corr_type=corr_type[0], dataset_name=dataset_name + "_" + str(num_box),
												 jk_group_name=f"{dataset_name}_jk{L_subboxes ** 3}")
					else:
						if num_nodes == None:
							self._measure_xi_rp_pi_sims_tree(
								tree_input=tree_input,
								masks=masks_total,
								dataset_name=dataset_name + "_" + str(num_box),
								print_num=False,
								save_tree=False,
								dataset_name_tree=f"w_{self.simname}_tree_{figname_dataset_name}",
								file_tree_path=file_tree_path,
								jk_group_name=f"{dataset_name}_jk{L_subboxes ** 3}",
							)
						else:
							self._measure_xi_rp_pi_sims_multiprocessing(
								masks=masks_total,
								dataset_name=dataset_name + "_" + str(num_box),
								print_num=False,
								num_nodes=num_nodes,
								jk_group_name=f"{dataset_name}_jk{L_subboxes ** 3}",
							)
						self._measure_w_g_i(corr_type=corr_type[0], dataset_name=dataset_name + "_" + str(num_box),
											jk_group_name=f"{dataset_name}_jk{L_subboxes ** 3}")

					num_box += 1
		if remove_tree_file and tree_saved:
			if corr_type[1] == "multipoles":
				os.remove(
					f"{file_tree_path}/m_{self.simname}_tree_{figname_dataset_name}.pickle")  # removes temp pickle file
			else:
				os.remove(
					f"{file_tree_path}/w_{self.simname}_tree_{figname_dataset_name}.pickle")  # removes temp pickle file
		covs, stds = [], []
		for d in np.arange(0, len(data)):
			data_file = h5py.File(self.output_file_name, "a")
			group_multipoles = data_file[f"{self.snap_group}/{data[d]}/{dataset_name}_jk{num_box}/"]
			# calculating mean of the datavectors
			mean_multipoles = np.zeros(self.num_bins_r)
			for b in np.arange(0, num_box):
				mean_multipoles += group_multipoles[dataset_name + "_" + str(b)]
			mean_multipoles /= num_box

			# calculation the covariance matrix (multipoles) and the standard deviation (sqrt of diag of cov)
			cov = np.zeros((self.num_bins_r, self.num_bins_r))
			std = np.zeros(self.num_bins_r)
			for b in np.arange(0, num_box):
				std += (group_multipoles[dataset_name + "_" + str(b)] - mean_multipoles) ** 2
				for i in np.arange(self.num_bins_r):
					cov[:, i] += (group_multipoles[dataset_name + "_" + str(b)] - mean_multipoles) * (
							group_multipoles[dataset_name + "_" + str(b)][i] - mean_multipoles[i]
					)
			std *= (num_box - 1) / num_box  # see Singh 2023
			std = np.sqrt(std)  # size of errorbars
			cov *= (num_box - 1) / num_box  # cov not sqrt so to get std, sqrt of diag would need to be taken
			data_file.close()
			if self.output_file_name != None:
				output_file = h5py.File(self.output_file_name, "a")
				group_multipoles = create_group_hdf5(output_file, f"{self.snap_group}/" + data[d])
				write_dataset_hdf5(group_multipoles, dataset_name + "_mean_" + str(num_box), data=mean_multipoles)
				write_dataset_hdf5(group_multipoles, dataset_name + "_jackknife_" + str(num_box), data=std)
				write_dataset_hdf5(group_multipoles, dataset_name + "_jackknife_cov_" + str(num_box), data=cov)
				output_file.close()
			else:
				covs.append(cov)
				stds.append(std)
		if self.output_file_name != None:
			return
		else:
			return covs, stds

	def _measure_jackknife_covariance_sims_multiprocessing(
			self, dataset_name, corr_type, masks=None, L_subboxes=3, rp_cut=None,
			num_nodes=4, twoD=False, tree=True, tree_saved=True, file_tree_path=None, remove_tree_file=True,
			save_jk_terms=False
	):
		"""Measures the covariance in the projected correlation functions using the jackknife method.
		The box is divided into L_subboxes^3 subboxes; the correlation function is calculated omitting one box at a time.
		Then the covariance is computed by combining the information of all realisations.
		This method uses >1 CPUs.

		Parameters
		----------
		dataset_name : str
			Name of the dataset in the output file.
		corr_type : iterable with 2 str entries
			Array with two entries. For first choose from [gg, g+, both], for second from [w, multipoles].
		masks : dict or NoneType, optional
			See MeasureIABox methods. Default is None.
		L_subboxes : int, optional
			Integer by which the length of the side of the box should be divided. Total number of jackknife realisations
			will be L_subboxes^3. Default is 3.
		rp_cut : float or NoneType, optional
			See MeasureIABox.measure_xi_multipoles. Default is None.
		tree_saved : bool, optional
			If True, a method using trees has been used in the correlation measurement and the tree information is
			saved. Default is True.
		file_tree_path : str or NoneType, optional
			Data path to tree information. See MeasureIABox methods. Default is None.
		remove_tree_file : bool, optional
			If True, the tree information file is removed after the jackknife covariance is measured. Default is True.
		num_nodes : int or NoneType, optional
			Number of cores to be used in multiprocessing. Default is 4.
		twoD : bool, optional
			Divide box into L_subboxes^2, no division in z-direction. Default value is False.
		tree : bool, optional
			Manually determine if trees should be used in measurement. Default is True
		save_jk_terms : bool, optional
			If True, DD, and S+D terms are saved for each realisation. Default value is False.

		Returns
		-------

		"""
		if corr_type[0] == "both":
			data = [corr_type[1] + "_g_plus", corr_type[1] + "_gg"]
			corr_type_suff = ["_g_plus", "_gg"]
			jk_terms = ["SplusD", "DD"]
		elif corr_type[0] == "g+":
			data = [corr_type[1] + "_g_plus"]
			corr_type_suff = ["_g_plus"]
			jk_terms = ["SplusD"]
		elif corr_type[0] == "gg":
			data = [corr_type[1] + "_g_plus", corr_type[1] + "_gg"]
			corr_type_suff = ["_g_plus", "_gg"]
			jk_terms = ["SplusD", "DD"]
			corr_type[0] = 'both'  # will write away wrong data at the end (j,data_j loop) if gg only
		# todo: update this to only do gg (count pairs methods) if asked
		# data = [corr_type[1] + "_gg"]
		# corr_type_suff = ["_gg"]
		else:
			raise KeyError("Unknown value for first entry of corr_type. Choose from [g+, gg, both]")

		if corr_type[1] == "multipoles":
			bin_var_names = ["r", "mu_r"]
		elif corr_type[1] == "w":
			bin_var_names = ["rp", "pi"]
		else:
			raise KeyError("Unknown value for second entry of corr_type. Choose from [multipoles, w_g_plus]")
		L_sub = self.boxsize / L_subboxes
		if twoD:
			z_L_sub = 1
		else:
			z_L_sub = L_subboxes
		figname_dataset_name = dataset_name
		if "/" in dataset_name:
			figname_dataset_name = figname_dataset_name.replace("/", "_")
		if "." in dataset_name:
			figname_dataset_name = figname_dataset_name.replace(".", "p")
		num_box = 0
		args_xi_g_plus, args_multipoles, tree_args = [], [], []
		for i in np.arange(0, L_subboxes):
			for j in np.arange(0, L_subboxes):
				for k in np.arange(0, z_L_sub):
					x_bounds = [i * L_sub, (i + 1) * L_sub]
					y_bounds = [j * L_sub, (j + 1) * L_sub]
					if twoD:
						z_bounds = [0.0, self.boxsize]
					else:
						z_bounds = [k * L_sub, (k + 1) * L_sub]
					x_mask = (self.data["Position"][:, 0] > x_bounds[0]) * (self.data["Position"][:, 0] < x_bounds[1])
					y_mask = (self.data["Position"][:, 1] > y_bounds[0]) * (self.data["Position"][:, 1] < y_bounds[1])
					z_mask = (self.data["Position"][:, 2] > z_bounds[0]) * (self.data["Position"][:, 2] < z_bounds[1])
					mask_position = np.invert(
						x_mask * y_mask * z_mask
					)  # mask that is True for all positions not in the subbox
					if self.Num_position == self.Num_shape:
						mask_shape = mask_position
					else:
						x_mask = (self.data["Position_shape_sample"][:, 0] > x_bounds[0]) * (
								self.data["Position_shape_sample"][:, 0] < x_bounds[1]
						)
						y_mask = (self.data["Position_shape_sample"][:, 1] > y_bounds[0]) * (
								self.data["Position_shape_sample"][:, 1] < y_bounds[1]
						)
						z_mask = (self.data["Position_shape_sample"][:, 2] > z_bounds[0]) * (
								self.data["Position_shape_sample"][:, 2] < z_bounds[1]
						)
						mask_shape = np.invert(
							x_mask * y_mask * z_mask
						)  # mask that is True for all positions not in the subbox
					if tree_saved:
						if masks != None:
							indices_shape = np.where(mask_shape[masks["Position_shape_sample"]])[0]
							mask_not_position = np.invert(mask_position[masks["Position"]])
							indices_not_position = np.where(mask_not_position)[0]
							masks_total = {
								"Position": masks["Position"],
								"Position_shape_sample": masks["Position_shape_sample"],
								"Axis_Direction": masks["Position_shape_sample"],
								"q": masks["Position_shape_sample"],
								"weight": masks["Position"],
								"weight_shape_sample": masks["Position_shape_sample"],
							}
						else:
							indices_shape = np.where(mask_shape)[0]
							mask_not_position = np.invert(mask_position)
							indices_not_position = np.where(mask_not_position)[0]
							masks_total = None
						tree_input = [indices_not_position, indices_shape]
					else:
						if masks != None:
							mask_position = mask_position * masks["Position"]
							mask_shape = mask_shape * masks["Position_shape_sample"]
						tree_input = None
						masks_total = {
							"Position": mask_position,
							"Position_shape_sample": mask_shape,
							"Axis_Direction": mask_shape,
							"q": mask_shape,
							"weight": mask_position,
							"weight_shape_sample": mask_shape,
						}
					if corr_type[1] == "multipoles":
						tree_args.append(tree_input)
						args_xi_g_plus.append(
							(
								dataset_name + "_" + str(num_box),
								masks_total,
								rp_cut,
								True,
								False,
								f"m_{self.simname}_tree_{figname_dataset_name}",
								False,
								file_tree_path,
							)
						)
					else:
						tree_args.append(tree_input)
						args_xi_g_plus.append(
							(
								dataset_name + "_" + str(num_box),
								masks_total,
								True,
								False,
								f"w_{self.simname}_tree_{figname_dataset_name}",
								False,
								file_tree_path,
							)
						)
					args_multipoles.append([corr_type[0], dataset_name + "_" + str(num_box)])

					num_box += 1
		args_xi_g_plus = np.array(args_xi_g_plus)
		args_multipoles = np.array(args_multipoles)
		multiproc_chuncks = np.array_split(np.arange(num_box), np.ceil(num_box / num_nodes))
		for chunck in multiproc_chuncks:
			chunck = np.array(chunck, dtype=int)
			if corr_type[1] == "multipoles":
				if tree:
					result = ProcessingPool(nodes=len(chunck)).map(
						self._measure_xi_r_mur_sims_tree,
						tree_args[min(chunck):max(chunck) + 1],
						args_xi_g_plus[chunck][:, 0],
						args_xi_g_plus[chunck][:, 1],
						args_xi_g_plus[chunck][:, 2],
						args_xi_g_plus[chunck][:, 3],
						args_xi_g_plus[chunck][:, 4],
						args_xi_g_plus[chunck][:, 5],
						args_xi_g_plus[chunck][:, 6],
						args_xi_g_plus[chunck][:, 7],
					)
				else:
					result = ProcessingPool(nodes=len(chunck)).map(
						self._measure_xi_r_mur_sims_brute,
						args_xi_g_plus[chunck][:, 0],
						args_xi_g_plus[chunck][:, 1],
						args_xi_g_plus[chunck][:, 2],
						args_xi_g_plus[chunck][:, 3],
						args_xi_g_plus[chunck][:, 4],
					)
			else:
				if tree:
					result = ProcessingPool(nodes=len(chunck)).map(
						self._measure_xi_rp_pi_sims_tree,
						tree_args[min(chunck):max(chunck) + 1],
						args_xi_g_plus[chunck][:, 0],
						args_xi_g_plus[chunck][:, 1],
						args_xi_g_plus[chunck][:, 2],
						args_xi_g_plus[chunck][:, 3],
						args_xi_g_plus[chunck][:, 4],
						args_xi_g_plus[chunck][:, 5],
						args_xi_g_plus[chunck][:, 6],
					)

				else:
					result = ProcessingPool(nodes=len(chunck)).map(
						self._measure_xi_rp_pi_sims_brute,
						args_xi_g_plus[chunck][:, 0],
						args_xi_g_plus[chunck][:, 1],
						args_xi_g_plus[chunck][:, 2],
						args_xi_g_plus[chunck][:, 3],
					)

			output_file = h5py.File(self.output_file_name, "a")
			for i in np.arange(0, len(chunck)):
				for j, data_j in enumerate(data):
					group_xigplus = create_group_hdf5(
						output_file,
						f"{self.snap_group}/{corr_type[1]}/xi{corr_type_suff[j]}/{dataset_name}_jk{L_subboxes ** 3}"
					)
					# correlation, (DD / RR_gg) - 1, separation_bins, pi_bins, Splus_D, DD, RR_g_plus
					write_dataset_hdf5(group_xigplus, f"{dataset_name}_{chunck[i]}", data=result[i][j])
					write_dataset_hdf5(
						group_xigplus, f"{dataset_name}_{chunck[i]}_{bin_var_names[0]}", data=result[i][2]
					)
					write_dataset_hdf5(
						group_xigplus, f"{dataset_name}_{chunck[i]}_{bin_var_names[1]}", data=result[i][3]
					)
					if save_jk_terms:
						write_dataset_hdf5(group_xigplus, f"{dataset_name}_{chunck[i]}_{jk_terms[j]}",
										   data=result[i][4 + j])
						write_dataset_hdf5(group_xigplus, f"{dataset_name}_{chunck[i]}_RR{corr_type_suff[j]}",
										   data=result[i][6])
			# write_dataset_hdf5(group_xigplus, f"{dataset_name}_{chunck[i]}_sigmasq", data=result[i][3])
			output_file.close()
		if remove_tree_file and tree_saved:
			if corr_type[1] == "multipoles":
				os.remove(
					f"{file_tree_path}/m_{self.simname}_tree_{figname_dataset_name}.pickle")  # removes temp pickle file
			else:
				os.remove(
					f"{file_tree_path}/w_{self.simname}_tree_{figname_dataset_name}.pickle")  # removes temp pickle file
		for i in np.arange(0, num_box):
			if corr_type[1] == "multipoles":
				self._measure_multipoles(corr_type=args_multipoles[i][0], dataset_name=args_multipoles[i][1],
										 jk_group_name=f"{dataset_name}_jk{L_subboxes ** 3}")
			else:
				self._measure_w_g_i(corr_type=args_multipoles[i][0], dataset_name=args_multipoles[i][1],
									jk_group_name=f"{dataset_name}_jk{L_subboxes ** 3}")
		covs, stds = [], []
		for d in np.arange(0, len(data)):
			data_file = h5py.File(self.output_file_name, "a")
			group_multipoles = data_file[f"{self.snap_group}/{data[d]}/{dataset_name}_jk{num_box}"]
			# calculating mean of the datavectors
			mean_multipoles = np.zeros((self.num_bins_r))
			for b in np.arange(0, num_box):
				mean_multipoles += group_multipoles[dataset_name + "_" + str(b)]
			mean_multipoles /= num_box

			# calculation the covariance matrix (multipoles) and the standard deviation (wg+)
			cov = np.zeros((self.num_bins_r, self.num_bins_r))
			std = np.zeros(self.num_bins_r)
			for b in np.arange(0, num_box):
				std += (group_multipoles[dataset_name + "_" + str(b)] - mean_multipoles) ** 2
				for i in np.arange(self.num_bins_r):
					cov[:, i] += (group_multipoles[dataset_name + "_" + str(b)] - mean_multipoles) * (
							group_multipoles[dataset_name + "_" + str(b)][i] - mean_multipoles[i]
					)
			std *= (num_box - 1) / num_box  # see Singh 2023
			std = np.sqrt(std)  # size of errorbars
			cov *= (num_box - 1) / num_box  # cov not sqrt so to get std, sqrt of diag would need to be taken
			data_file.close()
			if self.output_file_name != None:
				output_file = h5py.File(self.output_file_name, "a")
				group_multipoles = create_group_hdf5(output_file, f"{self.snap_group}/" + data[d])
				write_dataset_hdf5(group_multipoles, dataset_name + "_mean_" + str(num_box), data=mean_multipoles)
				write_dataset_hdf5(group_multipoles, dataset_name + "_jackknife_" + str(num_box), data=std)
				write_dataset_hdf5(group_multipoles, dataset_name + "_jackknife_cov_" + str(num_box), data=cov)
				output_file.close()
			else:
				covs.append(cov)
				stds.append(std)
		if self.output_file_name != None:
			return
		else:
			return covs, stds

	def _measure_jackknife_realisations_obs(
			self, patches_pos, patches_shape, corr_type, dataset_name, masks=None,
			rp_cut=None, over_h=False, cosmology=None, count_pairs=False, data_suffix="", num_sample_names=["S", "D"]
	):
		"""Measures the jackknife realisations for the projected correlation functions in MeasureIALightcone using the
		jackknife method. The area is already divided into patches; the correlation function is calculated omitting one
		patch at a time. This method uses 1 or more CPUs.

		Parameters
		----------
		patches_pos : ndarray
			Array with the patch numbers of each object in the position sample.
		patches_shape : ndarray
			Array with the patch numbers of each object in the shape sample.
		dataset_name : str
			Name of the dataset in the output file.
		corr_type : iterable with 2 str entries
			Array with two entries. For first choose from [gg, g+, both], for second from [w, multipoles].
		masks : dict or NoneType, optional
			See MeasureIALightcone methods. Default is None.
		rp_cut : float or NoneType, optional
			See MeasureIALightcone.measure_xi_multipoles. Default is None.
		over_h : bool, optional
			See MeasureIALightcone. Default value is False.
		cosmology : pyccl cosmology object or NoneType, optional
			See MeasureIALightcone. Default is None.
		count_pairs : bool, optional
			If True, only gg is measured, not g+. Default value is False.
		data_suffix : str, optional
			Addition to dataset name. Used to distinguish between DR,DD and RR measurements. Default value is "".
		num_sample_names : list with two entries, optional
			Keywords of the num_samples dictionary to access number of objects in position ond shape samples. Default
			value is ["S", "D"].

		Returns
		-------

		"""
		if count_pairs and data_suffix == "":
			raise ValueError("Enter a data suffix (like _DD) for your pair count.")
		figname_dataset_name = dataset_name
		if "/" in dataset_name:
			figname_dataset_name = figname_dataset_name.replace("/", "_")
		if "." in dataset_name:
			figname_dataset_name = figname_dataset_name.replace(".", "p")

		min_patch, max_patch = int(min(patches_pos)), int(max(patches_pos))
		num_patches = max_patch - min_patch + 1
		if min(patches_shape) != min_patch:
			print(
				"Warning! Minimum patch number of shape sample is not equal to minimum patch number of position sample.")
		if max(patches_shape) != max_patch:
			print(
				"Warning! Maximum patch number of shape sample is not equal to maximum patch number of position sample.")
		print(
			f"Calculating jackknife realisations for {num_patches} patches for {dataset_name}.")

		for i in np.arange(min_patch, max_patch + 1):
			mask_position = (patches_pos != i)
			mask_shape = (patches_shape != i)
			if masks != None:
				mask_position = mask_position * masks["Redshift"]
				mask_shape = mask_shape * masks["Redshift_shape_sample"]
			self.num_samples[f"{i}"][num_sample_names[0]] = sum(mask_shape)
			self.num_samples[f"{i}"][num_sample_names[1]] = sum(mask_position)
			masks_total = {
				"Redshift": mask_position,
				"Redshift_shape_sample": mask_shape,
				"RA": mask_position,
				"RA_shape_sample": mask_shape,
				"DEC": mask_position,
				"DEC_shape_sample": mask_shape,
				"e1": mask_shape,
				"e2": mask_shape,
				"weight": mask_position,
				"weight_shape_sample": mask_shape,
			}
			if corr_type[1] == "multipoles":
				if count_pairs:
					self._count_pairs_xi_r_mur_obs_brute(masks=masks_total, dataset_name=dataset_name + "_" + str(i),
														 over_h=over_h, cosmology=cosmology, print_num=False,
														 data_suffix=data_suffix, rp_cut=rp_cut,
														 jk_group_name=f"{dataset_name}_jk{num_patches}")
				else:
					self._measure_xi_r_mur_obs_brute(
						masks=masks_total,
						rp_cut=rp_cut,
						dataset_name=dataset_name + "_" + str(i),
						print_num=False,
						over_h=over_h,
						cosmology=cosmology,
						jk_group_name=f"{dataset_name}_jk{num_patches}",
					)

			else:
				if count_pairs:
					self._count_pairs_xi_rp_pi_obs_brute(masks=masks_total, dataset_name=dataset_name + "_" + str(i),
														 over_h=over_h, cosmology=cosmology, print_num=False,
														 data_suffix=data_suffix,
														 jk_group_name=f"{dataset_name}_jk{num_patches}")
				else:
					self._measure_xi_rp_pi_obs_brute(
						masks=masks_total,
						dataset_name=dataset_name + "_" + str(i),
						print_num=False,
						over_h=over_h,
						cosmology=cosmology,
						jk_group_name=f"{dataset_name}_jk{num_patches}",
					)
		return

	def _measure_jackknife_covariance_obs(
			self, IA_estimator, corr_type, dataset_name, max_patch, min_patch=1,
			randoms_suf="_randoms"
	):
		"""Combines the jackknife realisations measured with _measure_jackknife_realisations_obs or
		_measure_jackknife_realisations_obs_multiprocessing into a covariance.

		Parameters
		----------
		IA_estimator : str
			Choose which type of xi estimator is used. Choose from "clusters" or "galaxies".
		dataset_name : str
			Name of the dataset in the output file.
		corr_type : iterable with 2 str entries
			Array with two entries. For first choose from [gg, g+, both], for second from [w, multipoles].
		max_patch : int
			Maximum patch number used.
		min_patch : int, optional
			Minimum patch number used. Default value is 1.
		randoms_suf : str, optional
			Suffix used to denote the datasets that have been created using the randoms. Default value is "_randoms".

		Returns
		-------

		"""
		if corr_type[0] == "both":
			data = [corr_type[1] + "_g_plus", corr_type[1] + "_gg"]
		elif corr_type[0] == "g+":
			data = [corr_type[1] + "_g_plus"]
		elif corr_type[0] == "gg":
			data = [corr_type[1] + "_gg"]
		else:
			raise KeyError("Unknown value for corr_type. Choose from [g+, gg, both]")
		figname_dataset_name = dataset_name
		if "/" in dataset_name:
			figname_dataset_name = figname_dataset_name.replace("/", "_")
		if "." in dataset_name:
			figname_dataset_name = figname_dataset_name.replace(".", "p")
		num_patches = max_patch - min_patch + 1
		print(
			f"Calculating jackknife errors for {num_patches} patches for {dataset_name} with {dataset_name}{randoms_suf} as randoms.")

		covs, stds = [], []
		for d in np.arange(0, len(data)):
			for b in np.arange(min_patch, max_patch + 1):
				self._obs_estimator(corr_type, IA_estimator, f"{dataset_name}_{b}",
									f"{dataset_name}{randoms_suf}_{b}", self.num_samples[f"{b}"],
									jk_group_name=f"{dataset_name}_jk{num_patches}")
				if "w" in data[d]:
					self._measure_w_g_i(corr_type=corr_type[0], dataset_name=f"{dataset_name}_{b}",
										jk_group_name=f"{dataset_name}_jk{num_patches}")
				else:
					self._measure_multipoles(corr_type=corr_type[0], dataset_name=f"{dataset_name}_{b}",
											 jk_group_name=f"{dataset_name}_jk{num_patches}")

			data_file = h5py.File(self.output_file_name, "a")
			group_multipoles = data_file[f"{self.snap_group}/{data[d]}/{dataset_name}_jk{num_patches}"]
			# calculating mean of the datavectors
			mean_multipoles = np.zeros(self.num_bins_r)
			for b in np.arange(min_patch, max_patch + 1):
				mean_multipoles += group_multipoles[f"{dataset_name}_{b}"][:]
			mean_multipoles /= num_patches

			# calculation the covariance matrix (multipoles) and the standard deviation (sqrt of diag of cov)
			cov = np.zeros((self.num_bins_r, self.num_bins_r))
			std = np.zeros(self.num_bins_r)
			for b in np.arange(min_patch, max_patch + 1):
				correlation = group_multipoles[f"{dataset_name}_{b}"][:]
				std += (correlation - mean_multipoles) ** 2
				for i in np.arange(self.num_bins_r):
					cov[:, i] += (correlation - mean_multipoles) * (correlation[i] - mean_multipoles[i])

			std *= (num_patches - 1) / num_patches  # see Singh 2023
			std = np.sqrt(std)  # size of errorbars
			cov *= (num_patches - 1) / num_patches  # cov not sqrt so to get std, sqrt of diag would need to be taken
			data_file.close()
			if self.output_file_name != None:
				output_file = h5py.File(self.output_file_name, "a")
				group_multipoles = create_group_hdf5(output_file, f"{self.snap_group}/" + data[d])
				write_dataset_hdf5(group_multipoles, dataset_name + "_mean_" + str(num_patches), data=mean_multipoles)
				write_dataset_hdf5(group_multipoles, dataset_name + "_jackknife_" + str(num_patches), data=std)
				write_dataset_hdf5(group_multipoles, dataset_name + "_jackknife_cov_" + str(num_patches), data=cov)
				output_file.close()
			else:
				covs.append(cov)
				stds.append(std)
		if self.output_file_name != None:
			return
		else:
			return covs, stds

	def _measure_jackknife_realisations_obs_multiprocessing(
			self, patches_pos, patches_shape, corr_type, dataset_name, masks=None,
			rp_cut=None, over_h=False, num_nodes=4, cosmology=None, count_pairs=False, data_suffix="",
			num_sample_names=["S", "D"]
	):
		"""Measures the jackknife realisations for the projected correlation functions in MeasureIALightcone using the
		jackknife method. The area is already divided into patches; the correlation function is calculated omitting one
		patch at a time. This method uses >1 CPUs.

		Parameters
		----------
		patches_pos : ndarray
			Array with the patch numbers of each object in the position sample.
		patches_shape : ndarray
			Array with the patch numbers of each object in the shape sample.
		dataset_name : str
			Name of the dataset in the output file.
		corr_type : iterable with 2 str entries
			Array with two entries. For first choose from [gg, g+, both], for second from [w, multipoles].
		masks : dict or NoneType, optional
			See MeasureIALightcone methods. Default is None.
		rp_cut : float or NoneType, optional
			See MeasureIALightcone.measure_xi_multipoles. Default is None.
		over_h : bool, optional
			See MeasureIALightcone. Default value is False.
		num_nodes : int, optional
			Number of cores to be used in multiprocessing. Default is 4.
		cosmology : pyccl cosmology object or NoneType, optional
			See MeasureIALightcone. Default is None.
		count_pairs : bool, optional
			If True, only gg is measured, not g+. Default value is False.
		data_suffix : str, optional
			Addition to dataset name. Used to distinguish between DR,DD and RR measurements. Default value is "".
		num_sample_names : list with two entries, optional
			Keywords of the num_samples dictionary to access number of objects in position ond shape samples. Default
			value is ["S", "D"].


		Returns
		-------

		"""
		if num_nodes == 1:
			self._measure_jackknife_realisations_obs(patches_pos, patches_shape, masks, corr_type, dataset_name,
													 rp_cut, over_h, cosmology, count_pairs, data_suffix,
													 num_sample_names)
			return

		if count_pairs == False:
			corr_type[0] = "both"
		if corr_type[0] == "both":
			data = [corr_type[1] + "_g_plus", corr_type[1] + "_gg"]
			corr_type_suff = ["_g_plus", "_gg"]
			xi_suff = ["SplusD", "DD"]
		elif corr_type[0] == "g+":
			data = [corr_type[1] + "_g_plus"]
			corr_type_suff = ["_g_plus"]
			xi_suff = ["SplusD"]
		elif corr_type[0] == "gg":
			data = [corr_type[1] + "_gg"]
			corr_type_suff = ["_gg"]
			xi_suff = ["DD"]
		else:
			raise KeyError("Unknown value for first entry of corr_type. Choose from [g+, gg, both]")
		if corr_type[1] == "multipoles":
			bin_var_names = ["r", "mu_r"]
		elif corr_type[1] == "w":
			bin_var_names = ["rp", "pi"]
		else:
			raise KeyError("Unknown value for second entry of corr_type. Choose from [multipoles, w_g_plus]")
		min_patch, max_patch = int(min(patches_pos)), int(max(patches_pos))
		num_patches = max_patch - min_patch + 1
		if min(patches_shape) != min_patch:
			print(
				"Warning! Minimum patch number of shape sample is not equal to minimum patch number of position sample.")
		if max(patches_shape) != max_patch:
			print(
				"Warning! Maximum patch number of shape sample is not equal to maximum patch number of position sample.")
		args_xi_g_plus, args_multipoles, tree_args = [], [], []
		for i in np.arange(min_patch, max_patch + 1):
			mask_position = (patches_pos != i)
			mask_shape = (patches_shape != i)
			if masks != None:
				mask_position = mask_position * masks["Redshift"]
				mask_shape = mask_shape * masks["Redshift_shape_sample"]
			self.num_samples[f"{i}"][num_sample_names[0]] = sum(mask_shape)
			self.num_samples[f"{i}"][num_sample_names[1]] = sum(mask_position)
			masks_total = {
				"Redshift": mask_position,
				"Redshift_shape_sample": mask_shape,
				"RA": mask_position,
				"RA_shape_sample": mask_shape,
				"DEC": mask_position,
				"DEC_shape_sample": mask_shape,
				"e1": mask_shape,
				"e2": mask_shape,
				"weight": mask_position,
				"weight_shape_sample": mask_shape,
			}
			if corr_type[1] == "multipoles":
				args_xi_g_plus.append(
					(
						dataset_name + "_" + str(i),
						masks_total,
						True,
						False,
						over_h,
						cosmology,
						rp_cut,
						data_suffix
					)
				)
			else:
				args_xi_g_plus.append(
					(
						dataset_name + "_" + str(i),
						masks_total,
						True,
						False,
						over_h,
						cosmology,
						data_suffix
					)
				)

		args_xi_g_plus = np.array(args_xi_g_plus)
		multiproc_chuncks = np.array_split(np.arange(num_patches), np.ceil(num_patches / num_nodes))
		for chunck in multiproc_chuncks:
			chunck = np.array(chunck, dtype=int)
			if corr_type[1] == "multipoles":
				if count_pairs:
					result = ProcessingPool(nodes=len(chunck)).map(
						self._count_pairs_xi_r_mur_obs_brute,
						args_xi_g_plus[chunck][:, 0],
						args_xi_g_plus[chunck][:, 1],
						args_xi_g_plus[chunck][:, 2],
						args_xi_g_plus[chunck][:, 3],
						args_xi_g_plus[chunck][:, 4],
						args_xi_g_plus[chunck][:, 5],
						args_xi_g_plus[chunck][:, 6],
						args_xi_g_plus[chunck][:, 7],
					)
				else:
					result = ProcessingPool(nodes=len(chunck)).map(
						self._measure_xi_r_mur_obs_brute,
						args_xi_g_plus[chunck][:, 0],
						args_xi_g_plus[chunck][:, 1],
						args_xi_g_plus[chunck][:, 2],
						args_xi_g_plus[chunck][:, 3],
						args_xi_g_plus[chunck][:, 4],
						args_xi_g_plus[chunck][:, 5],
						args_xi_g_plus[chunck][:, 6],
					)
			else:
				if count_pairs:
					result = ProcessingPool(nodes=len(chunck)).map(
						self._count_pairs_xi_rp_pi_obs_brute,
						args_xi_g_plus[chunck][:, 0],
						args_xi_g_plus[chunck][:, 1],
						args_xi_g_plus[chunck][:, 2],
						args_xi_g_plus[chunck][:, 3],
						args_xi_g_plus[chunck][:, 4],
						args_xi_g_plus[chunck][:, 5],
						args_xi_g_plus[chunck][:, 6],
					)
				else:
					result = ProcessingPool(nodes=len(chunck)).map(
						self._measure_xi_rp_pi_obs_brute,
						args_xi_g_plus[chunck][:, 0],
						args_xi_g_plus[chunck][:, 1],
						args_xi_g_plus[chunck][:, 2],
						args_xi_g_plus[chunck][:, 3],
						args_xi_g_plus[chunck][:, 4],
						args_xi_g_plus[chunck][:, 5],
					)

			output_file = h5py.File(self.output_file_name, "a")
			if count_pairs:
				for i in np.arange(0, len(chunck)):
					for j, data_j in enumerate(data):
						group_xigplus = create_group_hdf5(
							output_file, f"{self.snap_group}/{corr_type[1]}/xi_gg/{dataset_name}_jk{num_patches}"
						)
						write_dataset_hdf5(group_xigplus, f"{dataset_name}_{chunck[i] + min_patch}{data_suffix}",
										   data=result[i][j])
						write_dataset_hdf5(
							group_xigplus, f"{dataset_name}_{chunck[i] + min_patch}_{bin_var_names[0]}",
							data=result[i][1]
						)
						write_dataset_hdf5(
							group_xigplus, f"{dataset_name}_{chunck[i] + min_patch}_{bin_var_names[1]}",
							data=result[i][2]
						)
			else:
				for i in np.arange(0, len(chunck)):
					for j, data_j in enumerate(data):
						group_xigplus = create_group_hdf5(
							output_file,
							f"{self.snap_group}/{corr_type[1]}/xi{corr_type_suff[j]}/{dataset_name}_jk{num_patches}"
						)
						write_dataset_hdf5(group_xigplus, f"{dataset_name}_{chunck[i] + min_patch}_{xi_suff[j]}",
										   data=result[i][j])
						write_dataset_hdf5(
							group_xigplus, f"{dataset_name}_{chunck[i] + min_patch}_{bin_var_names[0]}",
							data=result[i][2]
						)
						write_dataset_hdf5(
							group_xigplus, f"{dataset_name}_{chunck[i] + min_patch}_{bin_var_names[1]}",
							data=result[i][3]
						)
			# write_dataset_hdf5(group_xigplus, f"{dataset_name}_{chunck[i]}_sigmasq", data=result[i][3])
			output_file.close()

		# for i in np.arange(0, num_patches):
		# 	if corr_type[1] == "multipoles":
		# 		self.measure_multipoles(corr_type=args_multipoles[i, 0], dataset_name=args_multipoles[i, 1])
		# 	else:
		# 		self.measure_w_g_i(corr_type=args_multipoles[i, 0], dataset_name=args_multipoles[i, 1])
		return

	def measure_covariance_multiple_datasets(self, corr_type, dataset_names, num_box=27, return_output=False):
		"""Combines the jackknife measurements for different datasets into one covariance matrix.
		Author: Marta Garcia Escobar (starting from measure_jackknife methods); updated

		Parameters
		----------
		corr_type : str
			Which type of correlation is measured. Takes 'w_g_plus', 'w_gg', 'multipoles_g_plus' or 'multipoles_gg'.
		dataset_names : list of str
			List of the dataset names. If there is only one value, it calculates the covariance matrix with itself.
		num_box : int, optional
			Number of jackknife realisations. Default value is 27.
		return_output : bool, optional
			If True, the output will be returned instead of written to a file. Default value is False.

		Returns
		-------
		ndarray, ndarray
			covariance, standard deviation

		"""
		# check if corr_type is valid
		valid_corr_types = ["w_g_plus", "multipoles_g_plus", "w_gg", "multipoles_gg"]
		if corr_type not in valid_corr_types:
			raise ValueError("corr_type must be 'w_g_plus', 'w_gg', 'multipoles_g_plus' or 'multipoles_gg'.")

		data_file = h5py.File(self.output_file_name, "a")

		mean_list = []  # list of arrays

		for dataset_name in dataset_names:
			group = data_file[f"{self.snap_group}/{corr_type}/{dataset_name}_jk{num_box}"]
			mean_multipoles = np.zeros(self.num_bins_r)
			for b in np.arange(0, num_box):
				mean_multipoles += group[dataset_name + "_" + str(b)]
			mean_multipoles /= num_box
			mean_list.append(mean_multipoles)

		# calculation the covariance matrix and the standard deviation (sqrt of diag of cov)
		cov = np.zeros((self.num_bins_r, self.num_bins_r))
		std = np.zeros(self.num_bins_r)

		if len(dataset_names) == 1:  # covariance with itself
			dataset_name = dataset_names[0]
			group = data_file[f"{self.snap_group}/{corr_type}/{dataset_name}_jk{num_box}"]
			for b in np.arange(0, num_box):
				std += (group[dataset_name + "_" + str(b)] - mean_list[0]) ** 2
				for i in np.arange(self.num_bins_r):
					cov[:, i] += (group[dataset_name + "_" + str(b)] - mean_list[0]) * (
							group[dataset_name + "_" + str(b)][i] - mean_list[0][i]
					)
		elif len(dataset_names) == 2:
			group0 = data_file[f"{self.snap_group}/{corr_type}/{dataset_names[0]}_jk{num_box}"]
			group1 = data_file[f"{self.snap_group}/{corr_type}/{dataset_names[1]}_jk{num_box}"]
			for b in np.arange(0, num_box):
				std += (group0[dataset_names[0] + "_" + str(b)] - mean_list[0]) * (
						group1[dataset_names[1] + "_" + str(b)] - mean_list[1])
				for i in np.arange(self.num_bins_r):
					cov[:, i] += (group0[dataset_names[0] + "_" + str(b)] - mean_list[0]) * (
							group1[dataset_names[1] + "_" + str(b)][i] - mean_list[1][i]
					)
		else:
			raise KeyError("Too many datasets given, choose either 1 or 2")

		std *= (num_box - 1) / num_box  # see Singh 2023
		std = np.sqrt(std)  # size of errorbars
		cov *= (num_box - 1) / num_box  # cov not sqrt so to get std, sqrt of diag would need to be taken

		data_file.close()

		if (self.output_file_name != None) and (return_output == False):
			output_file = h5py.File(self.output_file_name, "a")
			group = create_group_hdf5(output_file, f"{self.snap_group}/{corr_type}")
			if len(dataset_names) == 2:
				write_dataset_hdf5(group, dataset_names[0] + "_" + dataset_names[1] + "_jackknife_cov_" + str(
					num_box), data=cov)
				write_dataset_hdf5(group,
								   dataset_names[0] + "_" + dataset_names[1] + "_jackknife_" + str(num_box),
								   data=std)

			else:
				write_dataset_hdf5(group, dataset_names[0] + "_jackknife_cov_" + str(num_box), data=cov)
				write_dataset_hdf5(group, dataset_names[0] + "_jackknife_" + str(num_box), data=std)
			output_file.close()
			return
		else:
			return cov, std

	def create_full_cov_matrix_projections(self, corr_type, dataset_names=["LOS_x", "LOS_y", "LOS_z"], num_box=27,
										   return_output=False):
		"""Function that creates the full covariance matrix for all 3 projections and combined covariance for 2
		projections by combining previously obtained jackknife information. Generalised from Marta Garcia Escobar's code.

		Parameters
		----------
		corr_type : str
			Which type of correlation is measured. Takes 'w_g_plus', 'w_gg', 'multipoles_g_plus' or 'multipoles_gg'.
		num_box : int, optional
			Number of jackknife realisations. Default value is 27.
		dataset_names : list of str
			Dataset names of projections to be combined. Default value is ["LOS_x","LOS_y","LOS_z"].
		return_output : bool, optional
			If True, the output will be returned instead of written to a file. Default value is False.

		Returns
		-------
		ndarrays
			covariance for 3 projections, covariance for x and y, covariance for x and z, covariance for y and z

		"""
		self.measure_covariance_multiple_datasets(corr_type=corr_type,
												  dataset_names=[dataset_names[0], dataset_names[1]], num_box=num_box)
		self.measure_covariance_multiple_datasets(corr_type=corr_type,
												  dataset_names=[dataset_names[0], dataset_names[2]], num_box=num_box)
		self.measure_covariance_multiple_datasets(corr_type=corr_type,
												  dataset_names=[dataset_names[1], dataset_names[2]], num_box=num_box)

		# import needed datasets
		output_file = h5py.File(self.output_file_name, "a")
		group = output_file[f"{self.snap_group}/{corr_type}"]

		# cov matrix between datasets
		cov_xx = group[f'{dataset_names[0]}_jackknife_cov_{num_box}'][:]
		cov_yy = group[f'{dataset_names[1]}_jackknife_cov_{num_box}'][:]
		cov_zz = group[f'{dataset_names[2]}_jackknife_cov_{num_box}'][:]
		cov_xy = group[f'{dataset_names[0]}_{dataset_names[1]}_jackknife_cov_{num_box}'][:]
		cov_yz = group[f'{dataset_names[0]}_{dataset_names[2]}_jackknife_cov_{num_box}'][:]
		cov_xz = group[f'{dataset_names[1]}_{dataset_names[2]}_jackknife_cov_{num_box}'][:]

		# 3 projections
		cov_top = np.concatenate((cov_xx, cov_xy, cov_xz), axis=1)
		cov_middle = np.concatenate((cov_xy.T, cov_yy, cov_yz), axis=1)  # cov_xy.T = cov_yx
		cov_bottom = np.concatenate((cov_xz.T, cov_yz.T, cov_zz), axis=1)
		cov3 = np.concatenate((cov_top, cov_middle, cov_bottom), axis=0)

		# all 2 projections
		cov_top = np.concatenate((cov_xx, cov_xy), axis=1)
		cov_middle = np.concatenate((cov_xy.T, cov_yy), axis=1)  # cov_xz.T = cov_zx
		cov2xy = np.concatenate((cov_top, cov_middle), axis=0)

		cov_top = np.concatenate((cov_xx, cov_xz), axis=1)
		cov_middle = np.concatenate((cov_xz.T, cov_zz), axis=1)  # cov_xz.T = cov_zx
		cov2xz = np.concatenate((cov_top, cov_middle), axis=0)

		cov_top = np.concatenate((cov_yy, cov_yz), axis=1)
		cov_middle = np.concatenate((cov_yz.T, cov_zz), axis=1)  # cov_xz.T = cov_zx
		cov2yz = np.concatenate((cov_top, cov_middle), axis=0)

		if return_output:
			return cov3, cov2xy, cov2xz, cov2yz
		else:
			write_dataset_hdf5(group,
							   f"{dataset_names[0]}_{dataset_names[1]}_{dataset_names[2]}_combined_jackknife_cov_{num_box}",
							   data=cov3)
			write_dataset_hdf5(group,
							   f'{dataset_names[0]}_{dataset_names[1]}_combined_jackknife_cov_{num_box}',
							   data=cov2xy)
			write_dataset_hdf5(group,
							   f'{dataset_names[0]}_{dataset_names[2]}_combined_jackknife_cov_{num_box}',
							   data=cov2xz)
			write_dataset_hdf5(group,
							   f'{dataset_names[1]}_{dataset_names[2]}_combined_jackknife_cov_{num_box}',
							   data=cov2yz)
			return

__init__(data, output_file_name, simulation=None, snapshot=None, separation_limits=[0.1, 20.0], num_bins_r=8, num_bins_pi=20, pi_max=None, boxsize=None, periodicity=True)

The init method of the MeasureJackknife class.

Notes

Constructor parameters 'data', 'output_file_name', 'simulation', 'snapshot', 'separation_limits', 'num_bins_r', 'num_bins_pi', 'pi_max', 'boxsize' and 'periodicity' are passed to MeasureIABase.

Source code in src/measureia/measure_jackknife.py
def __init__(
		self,
		data,
		output_file_name,
		simulation=None,
		snapshot=None,
		separation_limits=[0.1, 20.0],
		num_bins_r=8,
		num_bins_pi=20,
		pi_max=None,
		boxsize=None,
		periodicity=True,
):
	"""
	The __init__ method of the MeasureJackknife class.

	Notes
	-----
	Constructor parameters 'data', 'output_file_name', 'simulation', 'snapshot', 'separation_limits', 'num_bins_r',
	'num_bins_pi', 'pi_max', 'boxsize' and 'periodicity' are passed to MeasureIABase.

	"""
	super().__init__(data, output_file_name, simulation, snapshot, separation_limits, num_bins_r, num_bins_pi,
					 pi_max, boxsize, periodicity)
	return

measure_covariance_multiple_datasets(corr_type, dataset_names, num_box=27, return_output=False)

Combines the jackknife measurements for different datasets into one covariance matrix. Author: Marta Garcia Escobar (starting from measure_jackknife methods); updated

Parameters:
  • corr_type (str) –
    Which type of correlation is measured. Takes 'w_g_plus', 'w_gg', 'multipoles_g_plus' or 'multipoles_gg'.
    
  • dataset_names (list of str) –
    List of the dataset names. If there is only one value, it calculates the covariance matrix with itself.
    
  • num_box (int, default: 27 ) –
    Number of jackknife realisations. Default value is 27.
    
  • return_output (bool, default: False ) –
    If True, the output will be returned instead of written to a file. Default value is False.
    
Returns:
  • (ndarray, ndarray)

    covariance, standard deviation

Source code in src/measureia/measure_jackknife.py
def measure_covariance_multiple_datasets(self, corr_type, dataset_names, num_box=27, return_output=False):
	"""Combines the jackknife measurements for different datasets into one covariance matrix.
	Author: Marta Garcia Escobar (starting from measure_jackknife methods); updated

	Parameters
	----------
	corr_type : str
		Which type of correlation is measured. Takes 'w_g_plus', 'w_gg', 'multipoles_g_plus' or 'multipoles_gg'.
	dataset_names : list of str
		List of the dataset names. If there is only one value, it calculates the covariance matrix with itself.
	num_box : int, optional
		Number of jackknife realisations. Default value is 27.
	return_output : bool, optional
		If True, the output will be returned instead of written to a file. Default value is False.

	Returns
	-------
	ndarray, ndarray
		covariance, standard deviation

	"""
	# check if corr_type is valid
	valid_corr_types = ["w_g_plus", "multipoles_g_plus", "w_gg", "multipoles_gg"]
	if corr_type not in valid_corr_types:
		raise ValueError("corr_type must be 'w_g_plus', 'w_gg', 'multipoles_g_plus' or 'multipoles_gg'.")

	data_file = h5py.File(self.output_file_name, "a")

	mean_list = []  # list of arrays

	for dataset_name in dataset_names:
		group = data_file[f"{self.snap_group}/{corr_type}/{dataset_name}_jk{num_box}"]
		mean_multipoles = np.zeros(self.num_bins_r)
		for b in np.arange(0, num_box):
			mean_multipoles += group[dataset_name + "_" + str(b)]
		mean_multipoles /= num_box
		mean_list.append(mean_multipoles)

	# calculation the covariance matrix and the standard deviation (sqrt of diag of cov)
	cov = np.zeros((self.num_bins_r, self.num_bins_r))
	std = np.zeros(self.num_bins_r)

	if len(dataset_names) == 1:  # covariance with itself
		dataset_name = dataset_names[0]
		group = data_file[f"{self.snap_group}/{corr_type}/{dataset_name}_jk{num_box}"]
		for b in np.arange(0, num_box):
			std += (group[dataset_name + "_" + str(b)] - mean_list[0]) ** 2
			for i in np.arange(self.num_bins_r):
				cov[:, i] += (group[dataset_name + "_" + str(b)] - mean_list[0]) * (
						group[dataset_name + "_" + str(b)][i] - mean_list[0][i]
				)
	elif len(dataset_names) == 2:
		group0 = data_file[f"{self.snap_group}/{corr_type}/{dataset_names[0]}_jk{num_box}"]
		group1 = data_file[f"{self.snap_group}/{corr_type}/{dataset_names[1]}_jk{num_box}"]
		for b in np.arange(0, num_box):
			std += (group0[dataset_names[0] + "_" + str(b)] - mean_list[0]) * (
					group1[dataset_names[1] + "_" + str(b)] - mean_list[1])
			for i in np.arange(self.num_bins_r):
				cov[:, i] += (group0[dataset_names[0] + "_" + str(b)] - mean_list[0]) * (
						group1[dataset_names[1] + "_" + str(b)][i] - mean_list[1][i]
				)
	else:
		raise KeyError("Too many datasets given, choose either 1 or 2")

	std *= (num_box - 1) / num_box  # see Singh 2023
	std = np.sqrt(std)  # size of errorbars
	cov *= (num_box - 1) / num_box  # cov not sqrt so to get std, sqrt of diag would need to be taken

	data_file.close()

	if (self.output_file_name != None) and (return_output == False):
		output_file = h5py.File(self.output_file_name, "a")
		group = create_group_hdf5(output_file, f"{self.snap_group}/{corr_type}")
		if len(dataset_names) == 2:
			write_dataset_hdf5(group, dataset_names[0] + "_" + dataset_names[1] + "_jackknife_cov_" + str(
				num_box), data=cov)
			write_dataset_hdf5(group,
							   dataset_names[0] + "_" + dataset_names[1] + "_jackknife_" + str(num_box),
							   data=std)

		else:
			write_dataset_hdf5(group, dataset_names[0] + "_jackknife_cov_" + str(num_box), data=cov)
			write_dataset_hdf5(group, dataset_names[0] + "_jackknife_" + str(num_box), data=std)
		output_file.close()
		return
	else:
		return cov, std

create_full_cov_matrix_projections(corr_type, dataset_names=['LOS_x', 'LOS_y', 'LOS_z'], num_box=27, return_output=False)

Function that creates the full covariance matrix for all 3 projections and combined covariance for 2 projections by combining previously obtained jackknife information. Generalised from Marta Garcia Escobar's code.

Parameters:
  • corr_type (str) –
    Which type of correlation is measured. Takes 'w_g_plus', 'w_gg', 'multipoles_g_plus' or 'multipoles_gg'.
    
  • num_box (int, default: 27 ) –
    Number of jackknife realisations. Default value is 27.
    
  • dataset_names (list of str, default: ['LOS_x', 'LOS_y', 'LOS_z'] ) –
    Dataset names of projections to be combined. Default value is ["LOS_x","LOS_y","LOS_z"].
    
  • return_output (bool, default: False ) –
    If True, the output will be returned instead of written to a file. Default value is False.
    
Returns:
  • ndarrays

    covariance for 3 projections, covariance for x and y, covariance for x and z, covariance for y and z

Source code in src/measureia/measure_jackknife.py
def create_full_cov_matrix_projections(self, corr_type, dataset_names=["LOS_x", "LOS_y", "LOS_z"], num_box=27,
									   return_output=False):
	"""Function that creates the full covariance matrix for all 3 projections and combined covariance for 2
	projections by combining previously obtained jackknife information. Generalised from Marta Garcia Escobar's code.

	Parameters
	----------
	corr_type : str
		Which type of correlation is measured. Takes 'w_g_plus', 'w_gg', 'multipoles_g_plus' or 'multipoles_gg'.
	num_box : int, optional
		Number of jackknife realisations. Default value is 27.
	dataset_names : list of str
		Dataset names of projections to be combined. Default value is ["LOS_x","LOS_y","LOS_z"].
	return_output : bool, optional
		If True, the output will be returned instead of written to a file. Default value is False.

	Returns
	-------
	ndarrays
		covariance for 3 projections, covariance for x and y, covariance for x and z, covariance for y and z

	"""
	self.measure_covariance_multiple_datasets(corr_type=corr_type,
											  dataset_names=[dataset_names[0], dataset_names[1]], num_box=num_box)
	self.measure_covariance_multiple_datasets(corr_type=corr_type,
											  dataset_names=[dataset_names[0], dataset_names[2]], num_box=num_box)
	self.measure_covariance_multiple_datasets(corr_type=corr_type,
											  dataset_names=[dataset_names[1], dataset_names[2]], num_box=num_box)

	# import needed datasets
	output_file = h5py.File(self.output_file_name, "a")
	group = output_file[f"{self.snap_group}/{corr_type}"]

	# cov matrix between datasets
	cov_xx = group[f'{dataset_names[0]}_jackknife_cov_{num_box}'][:]
	cov_yy = group[f'{dataset_names[1]}_jackknife_cov_{num_box}'][:]
	cov_zz = group[f'{dataset_names[2]}_jackknife_cov_{num_box}'][:]
	cov_xy = group[f'{dataset_names[0]}_{dataset_names[1]}_jackknife_cov_{num_box}'][:]
	cov_yz = group[f'{dataset_names[0]}_{dataset_names[2]}_jackknife_cov_{num_box}'][:]
	cov_xz = group[f'{dataset_names[1]}_{dataset_names[2]}_jackknife_cov_{num_box}'][:]

	# 3 projections
	cov_top = np.concatenate((cov_xx, cov_xy, cov_xz), axis=1)
	cov_middle = np.concatenate((cov_xy.T, cov_yy, cov_yz), axis=1)  # cov_xy.T = cov_yx
	cov_bottom = np.concatenate((cov_xz.T, cov_yz.T, cov_zz), axis=1)
	cov3 = np.concatenate((cov_top, cov_middle, cov_bottom), axis=0)

	# all 2 projections
	cov_top = np.concatenate((cov_xx, cov_xy), axis=1)
	cov_middle = np.concatenate((cov_xy.T, cov_yy), axis=1)  # cov_xz.T = cov_zx
	cov2xy = np.concatenate((cov_top, cov_middle), axis=0)

	cov_top = np.concatenate((cov_xx, cov_xz), axis=1)
	cov_middle = np.concatenate((cov_xz.T, cov_zz), axis=1)  # cov_xz.T = cov_zx
	cov2xz = np.concatenate((cov_top, cov_middle), axis=0)

	cov_top = np.concatenate((cov_yy, cov_yz), axis=1)
	cov_middle = np.concatenate((cov_yz.T, cov_zz), axis=1)  # cov_xz.T = cov_zx
	cov2yz = np.concatenate((cov_top, cov_middle), axis=0)

	if return_output:
		return cov3, cov2xy, cov2xz, cov2yz
	else:
		write_dataset_hdf5(group,
						   f"{dataset_names[0]}_{dataset_names[1]}_{dataset_names[2]}_combined_jackknife_cov_{num_box}",
						   data=cov3)
		write_dataset_hdf5(group,
						   f'{dataset_names[0]}_{dataset_names[1]}_combined_jackknife_cov_{num_box}',
						   data=cov2xy)
		write_dataset_hdf5(group,
						   f'{dataset_names[0]}_{dataset_names[2]}_combined_jackknife_cov_{num_box}',
						   data=cov2xz)
		write_dataset_hdf5(group,
						   f'{dataset_names[1]}_{dataset_names[2]}_combined_jackknife_cov_{num_box}',
						   data=cov2yz)
		return