Performance Analysis - Cython
Number of effective sequences implemented in Cython
• Donatas Repečka • 12 min read
In the previous post I have compared various languages and libraries in terms of their speed. This notebook contains the code used in the comparison as well as some details about the choices made to improve the performance of Cython implementation.
# ! pip install pandas
# ! pip install Cython
import pandas as pd
def get_data(path):
fasta_df = pd.read_csv(path, sep="\n", lineterminator=">", index_col=False, names=['id', 'seq'])
return fasta_df.seq.to_numpy(dtype=str)
seqs = get_data('../data/picked_msa.fasta')
Just to remind the pseudo code looks like this:
for seq1 in seqs:
for seq2 in seqs:
if count_mathes(seq1, seq2) > threshold:
weight +=1
meff += 1/weight
meff = meff/(len(seq1)^0.5)
Cython implementation is quite simple, it exploits the symmetry to reduce calculations required, but does not require any other tricks to have decent performance. The overall performance is somewhere between Numpy and Numba.
%load_ext Cython
%%cython --annotate
import numpy as np
cimport numpy as cnp
def get_nf_cython(seqs, threshold=0.8):
cdef cnp.ndarray is_same_cluster
seqs = seqs.view(np.uint32).reshape(seqs.shape[0], -1)
n_seqs, seq_len = seqs.shape
is_same_cluster = np.ones([n_seqs, n_seqs],np.bool_)
for i in range(n_seqs):
current = seqs[i:]
pairwise_id = np.equal(current[1:], current[0].T).mean(1)
is_more = pairwise_id > threshold
is_same_cluster[i, i+1:] = is_more
is_same_cluster[i+1:, i] = is_more
meff = 1.0/is_same_cluster.sum(1)
return meff.sum()/(seq_len**0.5)
Generated by Cython 0.29.23
Yellow lines hint at Python interaction.
Click on a line that starts with a "+
" to see the C code that Cython generated for it.
01:
+02: import numpy as np
__pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* … */ __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
03: cimport numpy as cnp
04:
+05: def get_nf_cython(seqs, threshold=0.8):
/* Python wrapper */ static PyObject *__pyx_pw_46_cython_magic_ee6b92b0179f5a65348a99bc0555535b_1get_nf_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static PyMethodDef __pyx_mdef_46_cython_magic_ee6b92b0179f5a65348a99bc0555535b_1get_nf_cython = {"get_nf_cython", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_46_cython_magic_ee6b92b0179f5a65348a99bc0555535b_1get_nf_cython, METH_VARARGS|METH_KEYWORDS, 0}; static PyObject *__pyx_pw_46_cython_magic_ee6b92b0179f5a65348a99bc0555535b_1get_nf_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_seqs = 0; PyObject *__pyx_v_threshold = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("get_nf_cython (wrapper)", 0); { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_seqs,&__pyx_n_s_threshold,0}; PyObject* values[2] = {0,0}; values[1] = ((PyObject *)__pyx_float_0_8); if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); CYTHON_FALLTHROUGH; case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); CYTHON_FALLTHROUGH; case 0: break; default: goto __pyx_L5_argtuple_error; } kw_args = PyDict_Size(__pyx_kwds); switch (pos_args) { case 0: if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_seqs)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; CYTHON_FALLTHROUGH; case 1: if (kw_args > 0) { PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_threshold); if (value) { values[1] = value; kw_args--; } } } if (unlikely(kw_args > 0)) { if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "get_nf_cython") < 0)) __PYX_ERR(0, 5, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); CYTHON_FALLTHROUGH; case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); break; default: goto __pyx_L5_argtuple_error; } } __pyx_v_seqs = values[0]; __pyx_v_threshold = values[1]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("get_nf_cython", 0, 1, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 5, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("_cython_magic_ee6b92b0179f5a65348a99bc0555535b.get_nf_cython", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_46_cython_magic_ee6b92b0179f5a65348a99bc0555535b_get_nf_cython(__pyx_self, __pyx_v_seqs, __pyx_v_threshold); int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_46_cython_magic_ee6b92b0179f5a65348a99bc0555535b_get_nf_cython(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_seqs, PyObject *__pyx_v_threshold) { PyArrayObject *__pyx_v_is_same_cluster = 0; PyObject *__pyx_v_n_seqs = NULL; PyObject *__pyx_v_seq_len = NULL; CYTHON_UNUSED long __pyx_v_s; PyObject *__pyx_v_i = NULL; PyObject *__pyx_v_current = NULL; PyObject *__pyx_v_pairwise_id = NULL; PyObject *__pyx_v_is_more = NULL; PyObject *__pyx_v_meff = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("get_nf_cython", 0); __Pyx_INCREF(__pyx_v_seqs); /* … */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_4); __Pyx_XDECREF(__pyx_t_5); __Pyx_XDECREF(__pyx_t_8); __Pyx_XDECREF(__pyx_t_11); __Pyx_XDECREF(__pyx_t_12); __Pyx_AddTraceback("_cython_magic_ee6b92b0179f5a65348a99bc0555535b.get_nf_cython", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF((PyObject *)__pyx_v_is_same_cluster); __Pyx_XDECREF(__pyx_v_n_seqs); __Pyx_XDECREF(__pyx_v_seq_len); __Pyx_XDECREF(__pyx_v_i); __Pyx_XDECREF(__pyx_v_current); __Pyx_XDECREF(__pyx_v_pairwise_id); __Pyx_XDECREF(__pyx_v_is_more); __Pyx_XDECREF(__pyx_v_meff); __Pyx_XDECREF(__pyx_v_seqs); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* … */ __pyx_tuple__4 = PyTuple_Pack(11, __pyx_n_s_seqs, __pyx_n_s_threshold, __pyx_n_s_is_same_cluster, __pyx_n_s_n_seqs, __pyx_n_s_seq_len, __pyx_n_s_s, __pyx_n_s_i, __pyx_n_s_current, __pyx_n_s_pairwise_id, __pyx_n_s_is_more, __pyx_n_s_meff); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); /* … */ __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_ee6b92b0179f5a65348a99bc0555535b_1get_nf_cython, NULL, __pyx_n_s_cython_magic_ee6b92b0179f5a6534); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_get_nf_cython, __pyx_t_1) < 0) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
06:
07: cdef cnp.ndarray is_same_cluster
08:
+09: seqs = seqs.view(np.uint32).reshape(seqs.shape[0], -1)
__pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_seqs, __pyx_n_s_view); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_uint32); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_4 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_3))) { __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3); if (likely(__pyx_t_4)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_3, function); } } __pyx_t_2 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_3, __pyx_t_4, __pyx_t_5) : __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_reshape); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_seqs, __pyx_n_s_shape); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_5 = __Pyx_GetItemInt(__pyx_t_2, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_2 = NULL; __pyx_t_6 = 0; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_3))) { __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3); if (likely(__pyx_t_2)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); __Pyx_INCREF(__pyx_t_2); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_3, function); __pyx_t_6 = 1; } } #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_3)) { PyObject *__pyx_temp[3] = {__pyx_t_2, __pyx_t_5, __pyx_int_neg_1}; __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_3, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; } else #endif #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_3)) { PyObject *__pyx_temp[3] = {__pyx_t_2, __pyx_t_5, __pyx_int_neg_1}; __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_3, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; } else #endif { __pyx_t_4 = PyTuple_New(2+__pyx_t_6); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); if (__pyx_t_2) { __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_2); __pyx_t_2 = NULL; } __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_4, 0+__pyx_t_6, __pyx_t_5); __Pyx_INCREF(__pyx_int_neg_1); __Pyx_GIVEREF(__pyx_int_neg_1); PyTuple_SET_ITEM(__pyx_t_4, 1+__pyx_t_6, __pyx_int_neg_1); __pyx_t_5 = 0; __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_seqs, __pyx_t_1); __pyx_t_1 = 0;
+10: n_seqs, seq_len = seqs.shape
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_seqs, __pyx_n_s_shape); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if ((likely(PyTuple_CheckExact(__pyx_t_1))) || (PyList_CheckExact(__pyx_t_1))) { PyObject* sequence = __pyx_t_1; Py_ssize_t size = __Pyx_PySequence_SIZE(sequence); if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); __PYX_ERR(0, 10, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS if (likely(PyTuple_CheckExact(sequence))) { __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); __pyx_t_4 = PyTuple_GET_ITEM(sequence, 1); } else { __pyx_t_3 = PyList_GET_ITEM(sequence, 0); __pyx_t_4 = PyList_GET_ITEM(sequence, 1); } __Pyx_INCREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_4); #else __pyx_t_3 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); #endif __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { Py_ssize_t index = -1; __pyx_t_5 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 10, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_7 = Py_TYPE(__pyx_t_5)->tp_iternext; index = 0; __pyx_t_3 = __pyx_t_7(__pyx_t_5); if (unlikely(!__pyx_t_3)) goto __pyx_L3_unpacking_failed; __Pyx_GOTREF(__pyx_t_3); index = 1; __pyx_t_4 = __pyx_t_7(__pyx_t_5); if (unlikely(!__pyx_t_4)) goto __pyx_L3_unpacking_failed; __Pyx_GOTREF(__pyx_t_4); if (__Pyx_IternextUnpackEndCheck(__pyx_t_7(__pyx_t_5), 2) < 0) __PYX_ERR(0, 10, __pyx_L1_error) __pyx_t_7 = NULL; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; goto __pyx_L4_unpacking_done; __pyx_L3_unpacking_failed:; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_7 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); __PYX_ERR(0, 10, __pyx_L1_error) __pyx_L4_unpacking_done:; } __pyx_v_n_seqs = __pyx_t_3; __pyx_t_3 = 0; __pyx_v_seq_len = __pyx_t_4; __pyx_t_4 = 0;
+11: is_same_cluster = np.ones([n_seqs, n_seqs],np.bool_)
__Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_ones); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_4 = PyList_New(2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(__pyx_v_n_seqs); __Pyx_GIVEREF(__pyx_v_n_seqs); PyList_SET_ITEM(__pyx_t_4, 0, __pyx_v_n_seqs); __Pyx_INCREF(__pyx_v_n_seqs); __Pyx_GIVEREF(__pyx_v_n_seqs); PyList_SET_ITEM(__pyx_t_4, 1, __pyx_v_n_seqs); __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_np); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_bool); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = NULL; __pyx_t_6 = 0; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) { __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_3); if (likely(__pyx_t_5)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); __Pyx_INCREF(__pyx_t_5); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_3, function); __pyx_t_6 = 1; } } #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_3)) { PyObject *__pyx_temp[3] = {__pyx_t_5, __pyx_t_4, __pyx_t_2}; __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_3, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } else #endif #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_3)) { PyObject *__pyx_temp[3] = {__pyx_t_5, __pyx_t_4, __pyx_t_2}; __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_3, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } else #endif { __pyx_t_8 = PyTuple_New(2+__pyx_t_6); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); if (__pyx_t_5) { __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_5); __pyx_t_5 = NULL; } __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_8, 0+__pyx_t_6, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_8, 1+__pyx_t_6, __pyx_t_2); __pyx_t_4 = 0; __pyx_t_2 = 0; __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_8, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 11, __pyx_L1_error) __pyx_v_is_same_cluster = ((PyArrayObject *)__pyx_t_1); __pyx_t_1 = 0;
12:
+13: s = 0
__pyx_v_s = 0;
+14: for i in range(n_seqs):
__pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_builtin_range, __pyx_v_n_seqs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 14, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_3 = __pyx_t_1; __Pyx_INCREF(__pyx_t_3); __pyx_t_9 = 0; __pyx_t_10 = NULL; } else { __pyx_t_9 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 14, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 14, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_9 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_9); __Pyx_INCREF(__pyx_t_1); __pyx_t_9++; if (unlikely(0 < 0)) __PYX_ERR(0, 14, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_9); __pyx_t_9++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 14, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_9 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_9); __Pyx_INCREF(__pyx_t_1); __pyx_t_9++; if (unlikely(0 < 0)) __PYX_ERR(0, 14, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_9); __pyx_t_9++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 14, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } } else { __pyx_t_1 = __pyx_t_10(__pyx_t_3); if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); else __PYX_ERR(0, 14, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_1); } __Pyx_XDECREF_SET(__pyx_v_i, __pyx_t_1); __pyx_t_1 = 0; /* … */ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+15: current = seqs[i:]
__pyx_t_1 = __Pyx_PyObject_GetSlice(__pyx_v_seqs, 0, 0, &__pyx_v_i, NULL, NULL, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 15, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_XDECREF_SET(__pyx_v_current, __pyx_t_1); __pyx_t_1 = 0;
+16: pairwise_id = np.equal(current[1:], current[0].T).mean(1)
__Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_equal); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_2 = __Pyx_PyObject_GetSlice(__pyx_v_current, 1, 0, NULL, NULL, &__pyx_slice_, 1, 0, 1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_5 = __Pyx_GetItemInt(__pyx_v_current, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_T); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = NULL; __pyx_t_6 = 0; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) { __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4); if (likely(__pyx_t_5)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); __Pyx_INCREF(__pyx_t_5); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_4, function); __pyx_t_6 = 1; } } #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_4)) { PyObject *__pyx_temp[3] = {__pyx_t_5, __pyx_t_2, __pyx_t_11}; __pyx_t_8 = __Pyx_PyFunction_FastCall(__pyx_t_4, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; } else #endif #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_4)) { PyObject *__pyx_temp[3] = {__pyx_t_5, __pyx_t_2, __pyx_t_11}; __pyx_t_8 = __Pyx_PyCFunction_FastCall(__pyx_t_4, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; } else #endif { __pyx_t_12 = PyTuple_New(2+__pyx_t_6); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); if (__pyx_t_5) { __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_12, 0, __pyx_t_5); __pyx_t_5 = NULL; } __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_12, 0+__pyx_t_6, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_11); PyTuple_SET_ITEM(__pyx_t_12, 1+__pyx_t_6, __pyx_t_11); __pyx_t_2 = 0; __pyx_t_11 = 0; __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_12, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_mean); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_t_8 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_4))) { __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_4); if (likely(__pyx_t_8)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); __Pyx_INCREF(__pyx_t_8); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_4, function); } } __pyx_t_1 = (__pyx_t_8) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_8, __pyx_int_1) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_int_1); __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_XDECREF_SET(__pyx_v_pairwise_id, __pyx_t_1); __pyx_t_1 = 0; /* … */ __pyx_slice_ = PySlice_New(__pyx_int_1, Py_None, Py_None); if (unlikely(!__pyx_slice_)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_slice_); __Pyx_GIVEREF(__pyx_slice_);
+17: is_more = pairwise_id > threshold
__pyx_t_1 = PyObject_RichCompare(__pyx_v_pairwise_id, __pyx_v_threshold, Py_GT); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 17, __pyx_L1_error) __Pyx_XDECREF_SET(__pyx_v_is_more, __pyx_t_1); __pyx_t_1 = 0;
+18: is_same_cluster[i, i+1:] = is_more
__pyx_t_1 = __Pyx_PyInt_AddObjC(__pyx_v_i, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 18, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_4 = PySlice_New(__pyx_t_1, Py_None, Py_None); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 18, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 18, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_v_i); __Pyx_GIVEREF(__pyx_v_i); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_i); __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_4); __pyx_t_4 = 0; if (unlikely(PyObject_SetItem(((PyObject *)__pyx_v_is_same_cluster), __pyx_t_1, __pyx_v_is_more) < 0)) __PYX_ERR(0, 18, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+19: is_same_cluster[i+1:, i] = is_more
__pyx_t_1 = __Pyx_PyInt_AddObjC(__pyx_v_i, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_4 = PySlice_New(__pyx_t_1, Py_None, Py_None); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 19, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_4); __Pyx_INCREF(__pyx_v_i); __Pyx_GIVEREF(__pyx_v_i); PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_v_i); __pyx_t_4 = 0; if (unlikely(PyObject_SetItem(((PyObject *)__pyx_v_is_same_cluster), __pyx_t_1, __pyx_v_is_more) < 0)) __PYX_ERR(0, 19, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+20: meff = 1.0/is_same_cluster.sum(1)
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_is_same_cluster), __pyx_n_s_sum); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 20, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_4 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_1))) { __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_1); if (likely(__pyx_t_4)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_1, function); } } __pyx_t_3 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_1, __pyx_t_4, __pyx_int_1) : __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_int_1); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 20, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = __Pyx_PyFloat_TrueDivideCObj(__pyx_float_1_0, __pyx_t_3, 1.0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 20, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_meff = __pyx_t_1; __pyx_t_1 = 0;
+21: return meff.sum()/(seq_len**0.5)
__Pyx_XDECREF(__pyx_r); __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_meff, __pyx_n_s_sum); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 21, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_3))) { __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3); if (likely(__pyx_t_4)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_3, function); } } __pyx_t_1 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_3); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 21, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = PyNumber_Power(__pyx_v_seq_len, __pyx_float_0_5, Py_None); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 21, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = __Pyx_PyNumber_Divide(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 21, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_r = __pyx_t_4; __pyx_t_4 = 0; goto __pyx_L0;
%%timeit -n 3 -r 3
get_nf_cython(seqs[:100])
16.6 ms ± 1.57 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)
get_nf_cython(seqs[:100])
0.18006706787628665