Midterm Review

import numpy as np
import pandas as pd
import altair as alt
from sklearn.linear_model import LinearRegression

rng = np.random.default_rng()
A = rng.integers(0,10,size=(1,5))
A
array([[1, 6, 4, 5, 4]])
A = rng.integers(0,10,size=(5))
A
array([3, 3, 8, 6, 5])
A = rng.integers(0,10,size=(2,5))
A
array([[5, 9, 6, 0, 9],
       [4, 2, 1, 2, 8]])
A = rng.integers(0,10,size=(5,10))
# How many columns in A contain a 6
B = (A == 6)
type(B)
numpy.ndarray
B.sum(axis=0).shape
(10,)
A
array([[8, 8, 8, 6, 5, 1, 3, 6, 1, 7],
       [8, 8, 9, 5, 1, 7, 5, 6, 4, 7],
       [2, 7, 1, 9, 5, 9, 8, 8, 6, 2],
       [8, 2, 4, 1, 0, 1, 9, 0, 5, 7],
       [9, 5, 0, 2, 9, 4, 8, 7, 9, 9]])
B = (A == 6)
B[2,0:4]
array([False, False, False, False])
B[2,1::2]
array([False, False, False, False, False])
C = (B.sum(axis=0))
C
array([0, 0, 0, 1, 0, 0, 0, 2, 1, 0])
(C > 0).sum()
3
A
array([[8, 8, 8, 6, 5, 1, 3, 6, 1, 7],
       [8, 8, 9, 5, 1, 7, 5, 6, 4, 7],
       [2, 7, 1, 9, 5, 9, 8, 8, 6, 2],
       [8, 2, 4, 1, 0, 1, 9, 0, 5, 7],
       [9, 5, 0, 2, 9, 4, 8, 7, 9, 9]])
df = pd.DataFrame(A)
df
0 1 2 3 4 5 6 7 8 9
0 8 8 8 6 5 1 3 6 1 7
1 8 8 9 5 1 7 5 6 4 7
2 2 7 1 9 5 9 8 8 6 2
3 8 2 4 1 0 1 9 0 5 7
4 9 5 0 2 9 4 8 7 9 9
def has6(c):
    b = (6 in c)
    return b
def has6(c):
    return (6 in list(c))
has6([2,5,6,3])
True
df
0 1 2 3 4 5 6 7 8 9
0 8 8 8 6 5 1 3 6 1 7
1 8 8 9 5 1 7 5 6 4 7
2 2 7 1 9 5 9 8 8 6 2
3 8 2 4 1 0 1 9 0 5 7
4 9 5 0 2 9 4 8 7 9 9
df.apply(has6,axis=0)
0    False
1    False
2    False
3     True
4    False
5    False
6    False
7     True
8     True
9    False
dtype: bool
df == 6
0 1 2 3 4 5 6 7 8 9
0 False False False True False False False True False False
1 False False False False False False False True False False
2 False False False False False False False False True False
3 False False False False False False False False False False
4 False False False False False False False False False False
(df == 6).any(axis=0)
0    False
1    False
2    False
3     True
4    False
5    False
6    False
7     True
8     True
9    False
dtype: bool
(df == 6).any(axis=1)
0     True
1     True
2     True
3    False
4    False
dtype: bool
df
0 1 2 3 4 5 6 7 8 9
0 8 8 8 6 5 1 3 6 1 7
1 8 8 9 5 1 7 5 6 4 7
2 2 7 1 9 5 9 8 8 6 2
3 8 2 4 1 0 1 9 0 5 7
4 9 5 0 2 9 4 8 7 9 9
help(df.apply)
Help on method apply in module pandas.core.frame:

apply(func: 'AggFuncType', axis: 'Axis' = 0, raw: 'bool' = False, result_type=None, args=(), **kwargs) method of pandas.core.frame.DataFrame instance
    Apply a function along an axis of the DataFrame.
    
    Objects passed to the function are Series objects whose index is
    either the DataFrame's index (``axis=0``) or the DataFrame's columns
    (``axis=1``). By default (``result_type=None``), the final return type
    is inferred from the return type of the applied function. Otherwise,
    it depends on the `result_type` argument.
    
    Parameters
    ----------
    func : function
        Function to apply to each column or row.
    axis : {0 or 'index', 1 or 'columns'}, default 0
        Axis along which the function is applied:
    
        * 0 or 'index': apply function to each column.
        * 1 or 'columns': apply function to each row.
    
    raw : bool, default False
        Determines if row or column is passed as a Series or ndarray object:
    
        * ``False`` : passes each row or column as a Series to the
          function.
        * ``True`` : the passed function will receive ndarray objects
          instead.
          If you are just applying a NumPy reduction function this will
          achieve much better performance.
    
    result_type : {'expand', 'reduce', 'broadcast', None}, default None
        These only act when ``axis=1`` (columns):
    
        * 'expand' : list-like results will be turned into columns.
        * 'reduce' : returns a Series if possible rather than expanding
          list-like results. This is the opposite of 'expand'.
        * 'broadcast' : results will be broadcast to the original shape
          of the DataFrame, the original index and columns will be
          retained.
    
        The default behaviour (None) depends on the return value of the
        applied function: list-like results will be returned as a Series
        of those. However if the apply function returns a Series these
        are expanded to columns.
    args : tuple
        Positional arguments to pass to `func` in addition to the
        array/series.
    **kwargs
        Additional keyword arguments to pass as keywords arguments to
        `func`.
    
    Returns
    -------
    Series or DataFrame
        Result of applying ``func`` along the given axis of the
        DataFrame.
    
    See Also
    --------
    DataFrame.applymap: For elementwise operations.
    DataFrame.aggregate: Only perform aggregating type operations.
    DataFrame.transform: Only perform transforming type operations.
    
    Notes
    -----
    Functions that mutate the passed object can produce unexpected
    behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
    for more details.
    
    Examples
    --------
    >>> df = pd.DataFrame([[4, 9]] * 3, columns=['A', 'B'])
    >>> df
       A  B
    0  4  9
    1  4  9
    2  4  9
    
    Using a numpy universal function (in this case the same as
    ``np.sqrt(df)``):
    
    >>> df.apply(np.sqrt)
         A    B
    0  2.0  3.0
    1  2.0  3.0
    2  2.0  3.0
    
    Using a reducing function on either axis
    
    >>> df.apply(np.sum, axis=0)
    A    12
    B    27
    dtype: int64
    
    >>> df.apply(np.sum, axis=1)
    0    13
    1    13
    2    13
    dtype: int64
    
    Returning a list-like will result in a Series
    
    >>> df.apply(lambda x: [1, 2], axis=1)
    0    [1, 2]
    1    [1, 2]
    2    [1, 2]
    dtype: object
    
    Passing ``result_type='expand'`` will expand list-like results
    to columns of a Dataframe
    
    >>> df.apply(lambda x: [1, 2], axis=1, result_type='expand')
       0  1
    0  1  2
    1  1  2
    2  1  2
    
    Returning a Series inside the function is similar to passing
    ``result_type='expand'``. The resulting column names
    will be the Series index.
    
    >>> df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1)
       foo  bar
    0    1    2
    1    1    2
    2    1    2
    
    Passing ``result_type='broadcast'`` will ensure the same shape
    result, whether list-like or scalar is returned by the function,
    and broadcast it along the axis. The resulting column names will
    be the originals.
    
    >>> df.apply(lambda x: [1, 2], axis=1, result_type='broadcast')
       A  B
    0  1  2
    1  1  2
    2  1  2
df.apply(lambda row: row[2]-row[3], axis=1)
0    2
1    4
2   -8
3    3
4   -2
dtype: int64
df
0 1 2 3 4 5 6 7 8 9
0 8 8 8 6 5 1 3 6 1 7
1 8 8 9 5 1 7 5 6 4 7
2 2 7 1 9 5 9 8 8 6 2
3 8 2 4 1 0 1 9 0 5 7
4 9 5 0 2 9 4 8 7 9 9
df.max(axis=0)
0    9
1    8
2    9
3    9
4    9
5    9
6    9
7    8
8    9
9    9
dtype: int64
df.apply(max, axis=0)
0    9
1    8
2    9
3    9
4    9
5    9
6    9
7    8
8    9
9    9
dtype: int64
df.apply(lambda col: col[2]-col[3], axis=0)
0   -6
1    5
2   -3
3    8
4    5
5    8
6   -1
7    8
8    1
9   -5
dtype: int64
df.apply(len, axis=1)
0    10
1    10
2    10
3    10
4    10
dtype: int64
df.applymap(lambda x: 0 if x < 5 else 1000)
0 1 2 3 4 5 6 7 8 9
0 1000 1000 1000 1000 1000 0 0 1000 0 1000
1 1000 1000 1000 1000 0 1000 1000 1000 0 1000
2 0 1000 0 1000 1000 1000 1000 1000 1000 0
3 1000 0 0 0 0 0 1000 0 1000 1000
4 1000 1000 0 0 1000 0 1000 1000 1000 1000
df.applymap(lambda x: x+10)
0 1 2 3 4 5 6 7 8 9
0 18 18 18 16 15 11 13 16 11 17
1 18 18 19 15 11 17 15 16 14 17
2 12 17 11 19 15 19 18 18 16 12
3 18 12 14 11 10 11 19 10 15 17
4 19 15 10 12 19 14 18 17 19 19
s = pd.Series({"a":10,"b":10,"c":3})
s
a    10
b    10
c     3
dtype: int64
t = pd.Series({"a":1,"b":1,"c":2})
t
a    1
b    1
c    2
dtype: int64
s.max()
10
{"a":10,"b":10,"c":3}.max()
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/var/folders/8j/gshrlmtn7dg4qtztj4d4t_w40000gn/T/ipykernel_83319/1825081433.py in <module>
----> 1 {"a":10,"b":10,"c":3}.max()

AttributeError: 'dict' object has no attribute 'max'
s.map(lambda x: x+3)
a    13
b    13
c     6
dtype: int64
df.map(lambda x: x+3)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/var/folders/8j/gshrlmtn7dg4qtztj4d4t_w40000gn/T/ipykernel_83319/3670777216.py in <module>
----> 1 df.map(lambda x: x+3)

~/opt/anaconda3/envs/book/lib/python3.8/site-packages/pandas/core/generic.py in __getattr__(self, name)
   5485         ):
   5486             return self[name]
-> 5487         return object.__getattribute__(self, name)
   5488 
   5489     def __setattr__(self, name: str, value) -> None:

AttributeError: 'DataFrame' object has no attribute 'map'
s.applymap(lambda x: x+3)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/var/folders/8j/gshrlmtn7dg4qtztj4d4t_w40000gn/T/ipykernel_83319/2831778842.py in <module>
----> 1 s.applymap(lambda x: x+3)

~/opt/anaconda3/envs/book/lib/python3.8/site-packages/pandas/core/generic.py in __getattr__(self, name)
   5485         ):
   5486             return self[name]
-> 5487         return object.__getattribute__(self, name)
   5488 
   5489     def __setattr__(self, name: str, value) -> None:

AttributeError: 'Series' object has no attribute 'applymap'