
    h=                         d dl Z d dlZd dlZd dlZd dlmZ d dlmZ  e j                  dd      Z	d Z
dadedee   fd	Zd
 ZddededefdZy)    N)dict_row)OpenAIEMBED_MODELztext-embedding-3-smallc            
         t        j                  t        j                  dd      t        j                  dd      t        j                  dd      t        j                  dd      t        j                  d	d
      t              S )NDB_HOST	localhostDB_PORT5432DB_NAME	dynbot_dbDB_USERchatbot_userDB_PASSWORDz"tLgPX5dQJ^LgXttV3Q3PfJ7V3YJ*X9Q7JL)hostportdbnameuserpasswordrow_factory)psycopgconnectosgetenvr        */var/www/html/dynbot/referenz_retrieval.pyget_db_connr   	   s]    ??YYy+.YYy&)yyK0YYy.1=*NO r   textreturnc                     t         t        t        j                  d            a t         j                  j                  t        | xs dj                               }|j                  d   j                  S )NOPENAI_API_KEY)api_key )modelinputr   )
_openair   r   r   
embeddingscreater   stripdata	embedding)r   resps     r   embed_queryr-      sW    +;!<=$$;tzr>P>P>R$SD99Q<!!!r   c                     d}d}d}t        | |      D ]  \  }}|||z  z  }|||z  z  }|||z  z  } |dk(  s|dk(  ry|t        j                  |      t        j                  |      z  z  S )N        r   )zipmathsqrt)abdotnanbxys          r   
cosine_simr:      s}    
CCrcAq	 )1qs
B!A#IBrQqSyr)	Qw"'$))B-$))B-/00r   challenge_text	tenant_idtop_kc                 N   t        |       }d}d}d}t               5 }|j                         5 }|j                  ||f       |j	                         }	i }
|	D ]  }g }|d   r	 t        j                  |d         }|rt        ||      nd}|d   }|
j                  |      }|s|d   ||dk\  rd	nd
d}n$t        |d   |      |d<   |dk\  r|dxx   d	z  cc<   ||
|<    |
sg cddd       cddd       S g }|
j                         D ]  \  }}|j                  |||f       |j                         }d}|r*|d   r%	 t        j                  |d         }t        ||      }|j                  ||d   |d   ||d   dt        |d   |      z  d|z  z   d        |j                  d d       |dt        d	|       }g }|D ]  }|j                  |||d   |d   d       |j	                         }|D cg c]+  }|d   j                         dk7  s|d   dk(  r|d   |d   d- }}|j                  |d   |d   t!        |d         |d        |cddd       cddd       S # t        $ r g }Y w xY w# t        $ r Y !w xY wc c}w # 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)a  
    Python-side similarity:
    1) Fetch all Einordnung bullets for tenant
    2) Score with cosine_sim
    3) Roll-up by story_id (take max), bonus for multiple close hits
    4) Re-rank by full Einordnung similarity as tie-break
    5) Return top_k with all sections (HTML)
    z
    SELECT id, story_id, title, content_text, embedding_json
    FROM documents
    WHERE tenant_id = %s
      AND section ILIKE 'Einordnung'
      AND bullet_index >= 0
    z
    SELECT embedding_json
    FROM documents
    WHERE tenant_id = %s
      AND section ILIKE 'Einordnung'
      AND bullet_index = -1
      AND story_id = %s
    LIMIT 1
    a  
    SELECT section, bullet_index, content_html
    FROM documents
    WHERE tenant_id = %(tenant_id)s
    AND title     = %(title)s
    AND story_id  = %(story_id)s
    AND NOT (section ILIKE 'Einordnung' AND bullet_index >= 0)
    ORDER BY
    CASE
        WHEN section ILIKE 'Titel' THEN 0
        WHEN section ILIKE 'Subtitel' THEN 1
        WHEN section ILIKE 'Branche' THEN 2
        WHEN section ILIKE 'Beschreibung-Titel' THEN 3
        WHEN section ILIKE 'Beschreibung' THEN 4
        WHEN section ILIKE 'Beschreibung-Highlight' THEN 5
        WHEN section ILIKE 'Beschreibung-Finish' THEN 6
        WHEN section ILIKE 'Einordnung' THEN 7
        WHEN section ILIKE 'Beitrag' THEN 8
        WHEN section ILIKE 'Zitat' THEN 9
        ELSE 99
    END, section;
    embedding_jsonr/   story_idtitleg333333?   r   )rA   best
close_hitsrC   rD   Ngffffff?g333333?)r@   rA   
bullet_simfull_simrD   finalc                     | d   | d   fS )NrG   rD   r   )rs    r   <lambda>z/find_matching_referenzstories.<locals>.<lambda>   s    1W:q"? r   T)keyreverse)r<   rA   r@   section
einordnungbullet_indexcontent_html)rM   rQ   rG   )r@   rA   scoresections)r-   r   cursorexecutefetchalljsonloads	Exceptionr:   getmaxitemsfetchoneappendsortlowerfloat)r;   r<   r=   qvecFETCH_BULLETS_SQLFETCH_FULL_SQLFETCH_SECTIONS_SQLconncurbullets	per_storyrowembsimsidrecrankedfullrF   fvectopresultsrI   secss
clean_secss                             r   find_matching_referenzstoriesrw   $   s?    ~&DN. 
 H$ H%	|4,,. 	 	!CC#$**S)9%:;C ,/*T3'CCj/C--$C #GccUYk_`a!#f+s3F$;%*% IcN!	!& 5H H H: !) 	HCKKC(89<<>DH-.::d+;&<=D)$5H MMW!&k$!,/CK ::S8^K 	& 	?N^c!Um$  	AKK&"+ajaPZm\ <<>D Y<%%'<71^;LPR;R iL!N:KLJ 
 NNjM7qz*&	 	$ QH H H ! C6 ! ,yH H H H Hs   J2J I8A'J	J2AJ8$I1BJ/0J
-J	JI.	*J-I.	.J1	I>	:J=I>	>JJ	JJ$)   )r   rW   r1   r   psycopg.rowsr   openair   r   r   r   r&   strlistra   r-   r:   intrw   r   r   r   <module>r~      sm       ! bii'?@ "c "d5k "1{# {# {c {r   