Skip to content

Commit f7ee2e5

Browse files
committedNov 24, 2023
new openai api
1 parent 6f02b5b commit f7ee2e5

File tree

5 files changed

+414
-37
lines changed

5 files changed

+414
-37
lines changed
 

‎data/sample_apps.parquet

48.3 KB
Binary file not shown.

‎lm_test.py

-35
This file was deleted.

‎notebooks/sample_apps.ipynb

+373
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,373 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"/Users/ugoren/Code/PR/llm_workshop/data/sample_apps.parquet\n"
13+
]
14+
}
15+
],
16+
"source": [
17+
"import sys, json\n",
18+
"import pandas as pd\n",
19+
"from pathlib import Path\n",
20+
"from IPython.display import HTML, display\n",
21+
"from decouple import config\n",
22+
"sys.path.append(\"../src/\")\n",
23+
"from llm_helpers import chatgpt_ask\n",
24+
"\n",
25+
"pd.set_option('display.max_columns', 1000, 'display.width', 1000, 'display.max_rows',1000)\n",
26+
"\n",
27+
"data_dir = Path(\".\").absolute().parent/\"data\"\n",
28+
"ls = lambda p:print(\"\\n\".join(map(str,p.iterdir())))\n",
29+
"\n",
30+
"ls(data_dir)"
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": 2,
36+
"metadata": {},
37+
"outputs": [
38+
{
39+
"data": {
40+
"text/html": [
41+
"<div>\n",
42+
"<style scoped>\n",
43+
" .dataframe tbody tr th:only-of-type {\n",
44+
" vertical-align: middle;\n",
45+
" }\n",
46+
"\n",
47+
" .dataframe tbody tr th {\n",
48+
" vertical-align: top;\n",
49+
" }\n",
50+
"\n",
51+
" .dataframe thead th {\n",
52+
" text-align: right;\n",
53+
" }\n",
54+
"</style>\n",
55+
"<table border=\"1\" class=\"dataframe\">\n",
56+
" <thead>\n",
57+
" <tr style=\"text-align: right;\">\n",
58+
" <th></th>\n",
59+
" <th>bundle_id</th>\n",
60+
" <th>title</th>\n",
61+
" <th>description</th>\n",
62+
" <th>store_url</th>\n",
63+
" <th>category_names</th>\n",
64+
" <th>ios</th>\n",
65+
" </tr>\n",
66+
" </thead>\n",
67+
" <tbody>\n",
68+
" <tr>\n",
69+
" <th>7477</th>\n",
70+
" <td>com.alibaba.aliexpresshd</td>\n",
71+
" <td>AliExpress</td>\n",
72+
" <td>Maximum deals. Maximum fun. Shop our biggest s...</td>\n",
73+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
74+
" <td>SHOPPING,APPLICATION</td>\n",
75+
" <td>False</td>\n",
76+
" </tr>\n",
77+
" <tr>\n",
78+
" <th>49136</th>\n",
79+
" <td>com.tripledot.woodoku</td>\n",
80+
" <td>Woodoku - Block Puzzle Games</td>\n",
81+
" <td>Woodoku: a wood block puzzle game meets a sudo...</td>\n",
82+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
83+
" <td>GAME_PUZZLE,GAME</td>\n",
84+
" <td>False</td>\n",
85+
" </tr>\n",
86+
" <tr>\n",
87+
" <th>383</th>\n",
88+
" <td>air.com.buffalo_studios.newflashbingo</td>\n",
89+
" <td>Bingo Blitz™️ - Bingo Games</td>\n",
90+
" <td>Experience your free online bingo game as you ...</td>\n",
91+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
92+
" <td>GAME_BOARD,GAME</td>\n",
93+
" <td>False</td>\n",
94+
" </tr>\n",
95+
" <tr>\n",
96+
" <th>2156</th>\n",
97+
" <td>1200391796</td>\n",
98+
" <td>June's Journey: Hidden Objects</td>\n",
99+
" <td>I spy a mystery! Find the hidden objects to cr...</td>\n",
100+
" <td>https://apps.apple.com/us/app/junes-journey-hi...</td>\n",
101+
" <td>Games,Adventure,Puzzle</td>\n",
102+
" <td>True</td>\n",
103+
" </tr>\n",
104+
" <tr>\n",
105+
" <th>30750</th>\n",
106+
" <td>com.lazada.android</td>\n",
107+
" <td>Lazada - Online Shopping App!</td>\n",
108+
" <td>Lazada boast a large selection of and assortme...</td>\n",
109+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
110+
" <td>SHOPPING,APPLICATION</td>\n",
111+
" <td>False</td>\n",
112+
" </tr>\n",
113+
" <tr>\n",
114+
" <th>39423</th>\n",
115+
" <td>com.pinterest</td>\n",
116+
" <td>Pinterest</td>\n",
117+
" <td>Pinterest is the place to explore inspiration....</td>\n",
118+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
119+
" <td>LIFESTYLE,APPLICATION</td>\n",
120+
" <td>False</td>\n",
121+
" </tr>\n",
122+
" <tr>\n",
123+
" <th>35146</th>\n",
124+
" <td>com.moonactive.coinmaster</td>\n",
125+
" <td>Coin Master</td>\n",
126+
" <td>Join your Facebook friends and millions of pla...</td>\n",
127+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
128+
" <td>GAME_CASUAL,GAME</td>\n",
129+
" <td>False</td>\n",
130+
" </tr>\n",
131+
" <tr>\n",
132+
" <th>32145</th>\n",
133+
" <td>com.macys.android</td>\n",
134+
" <td>Macy's</td>\n",
135+
" <td>The latest version of the Macy’s app is better...</td>\n",
136+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
137+
" <td>SHOPPING,APPLICATION</td>\n",
138+
" <td>False</td>\n",
139+
" </tr>\n",
140+
" <tr>\n",
141+
" <th>4263</th>\n",
142+
" <td>359917414</td>\n",
143+
" <td>Solitaire</td>\n",
144+
" <td>Solitaire by MobilityWare is the ORIGINAL make...</td>\n",
145+
" <td>https://apps.apple.com/us/app/solitaire/id3599...</td>\n",
146+
" <td>Games,Casino,Card</td>\n",
147+
" <td>True</td>\n",
148+
" </tr>\n",
149+
" </tbody>\n",
150+
"</table>\n",
151+
"</div>"
152+
],
153+
"text/plain": [
154+
" bundle_id title description store_url category_names ios\n",
155+
"7477 com.alibaba.aliexpresshd AliExpress Maximum deals. Maximum fun. Shop our biggest s... https://play.google.com/store/apps/details?id=... SHOPPING,APPLICATION False\n",
156+
"49136 com.tripledot.woodoku Woodoku - Block Puzzle Games Woodoku: a wood block puzzle game meets a sudo... https://play.google.com/store/apps/details?id=... GAME_PUZZLE,GAME False\n",
157+
"383 air.com.buffalo_studios.newflashbingo Bingo Blitz™️ - Bingo Games Experience your free online bingo game as you ... https://play.google.com/store/apps/details?id=... GAME_BOARD,GAME False\n",
158+
"2156 1200391796 June's Journey: Hidden Objects I spy a mystery! Find the hidden objects to cr... https://apps.apple.com/us/app/junes-journey-hi... Games,Adventure,Puzzle True\n",
159+
"30750 com.lazada.android Lazada - Online Shopping App! Lazada boast a large selection of and assortme... https://play.google.com/store/apps/details?id=... SHOPPING,APPLICATION False\n",
160+
"39423 com.pinterest Pinterest Pinterest is the place to explore inspiration.... https://play.google.com/store/apps/details?id=... LIFESTYLE,APPLICATION False\n",
161+
"35146 com.moonactive.coinmaster Coin Master Join your Facebook friends and millions of pla... https://play.google.com/store/apps/details?id=... GAME_CASUAL,GAME False\n",
162+
"32145 com.macys.android Macy's The latest version of the Macy’s app is better... https://play.google.com/store/apps/details?id=... SHOPPING,APPLICATION False\n",
163+
"4263 359917414 Solitaire Solitaire by MobilityWare is the ORIGINAL make... https://apps.apple.com/us/app/solitaire/id3599... Games,Casino,Card True"
164+
]
165+
},
166+
"execution_count": 2,
167+
"metadata": {},
168+
"output_type": "execute_result"
169+
}
170+
],
171+
"source": [
172+
"df = pd.read_parquet(data_dir / \"sample_apps.parquet\").sample(9)\n",
173+
"df"
174+
]
175+
},
176+
{
177+
"cell_type": "markdown",
178+
"metadata": {},
179+
"source": [
180+
"## Ask ChatGPT a question on every row"
181+
]
182+
},
183+
{
184+
"cell_type": "code",
185+
"execution_count": 3,
186+
"metadata": {},
187+
"outputs": [
188+
{
189+
"data": {
190+
"text/html": [
191+
"<div>\n",
192+
"<style scoped>\n",
193+
" .dataframe tbody tr th:only-of-type {\n",
194+
" vertical-align: middle;\n",
195+
" }\n",
196+
"\n",
197+
" .dataframe tbody tr th {\n",
198+
" vertical-align: top;\n",
199+
" }\n",
200+
"\n",
201+
" .dataframe thead th {\n",
202+
" text-align: right;\n",
203+
" }\n",
204+
"</style>\n",
205+
"<table border=\"1\" class=\"dataframe\">\n",
206+
" <thead>\n",
207+
" <tr style=\"text-align: right;\">\n",
208+
" <th></th>\n",
209+
" <th>bundle_id</th>\n",
210+
" <th>title</th>\n",
211+
" <th>description</th>\n",
212+
" <th>store_url</th>\n",
213+
" <th>category_names</th>\n",
214+
" <th>ios</th>\n",
215+
" <th>for_kids</th>\n",
216+
" </tr>\n",
217+
" </thead>\n",
218+
" <tbody>\n",
219+
" <tr>\n",
220+
" <th>7477</th>\n",
221+
" <td>com.alibaba.aliexpresshd</td>\n",
222+
" <td>AliExpress</td>\n",
223+
" <td>Maximum deals. Maximum fun. Shop our biggest s...</td>\n",
224+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
225+
" <td>SHOPPING,APPLICATION</td>\n",
226+
" <td>False</td>\n",
227+
" <td>No</td>\n",
228+
" </tr>\n",
229+
" <tr>\n",
230+
" <th>49136</th>\n",
231+
" <td>com.tripledot.woodoku</td>\n",
232+
" <td>Woodoku - Block Puzzle Games</td>\n",
233+
" <td>Woodoku: a wood block puzzle game meets a sudo...</td>\n",
234+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
235+
" <td>GAME_PUZZLE,GAME</td>\n",
236+
" <td>False</td>\n",
237+
" <td>Yes</td>\n",
238+
" </tr>\n",
239+
" <tr>\n",
240+
" <th>383</th>\n",
241+
" <td>air.com.buffalo_studios.newflashbingo</td>\n",
242+
" <td>Bingo Blitz™️ - Bingo Games</td>\n",
243+
" <td>Experience your free online bingo game as you ...</td>\n",
244+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
245+
" <td>GAME_BOARD,GAME</td>\n",
246+
" <td>False</td>\n",
247+
" <td>No.</td>\n",
248+
" </tr>\n",
249+
" <tr>\n",
250+
" <th>2156</th>\n",
251+
" <td>1200391796</td>\n",
252+
" <td>June's Journey: Hidden Objects</td>\n",
253+
" <td>I spy a mystery! Find the hidden objects to cr...</td>\n",
254+
" <td>https://apps.apple.com/us/app/junes-journey-hi...</td>\n",
255+
" <td>Games,Adventure,Puzzle</td>\n",
256+
" <td>True</td>\n",
257+
" <td>No.</td>\n",
258+
" </tr>\n",
259+
" <tr>\n",
260+
" <th>30750</th>\n",
261+
" <td>com.lazada.android</td>\n",
262+
" <td>Lazada - Online Shopping App!</td>\n",
263+
" <td>Lazada boast a large selection of and assortme...</td>\n",
264+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
265+
" <td>SHOPPING,APPLICATION</td>\n",
266+
" <td>False</td>\n",
267+
" <td>No</td>\n",
268+
" </tr>\n",
269+
" <tr>\n",
270+
" <th>39423</th>\n",
271+
" <td>com.pinterest</td>\n",
272+
" <td>Pinterest</td>\n",
273+
" <td>Pinterest is the place to explore inspiration....</td>\n",
274+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
275+
" <td>LIFESTYLE,APPLICATION</td>\n",
276+
" <td>False</td>\n",
277+
" <td>No</td>\n",
278+
" </tr>\n",
279+
" <tr>\n",
280+
" <th>35146</th>\n",
281+
" <td>com.moonactive.coinmaster</td>\n",
282+
" <td>Coin Master</td>\n",
283+
" <td>Join your Facebook friends and millions of pla...</td>\n",
284+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
285+
" <td>GAME_CASUAL,GAME</td>\n",
286+
" <td>False</td>\n",
287+
" <td>Yes.</td>\n",
288+
" </tr>\n",
289+
" <tr>\n",
290+
" <th>32145</th>\n",
291+
" <td>com.macys.android</td>\n",
292+
" <td>Macy's</td>\n",
293+
" <td>The latest version of the Macy’s app is better...</td>\n",
294+
" <td>https://play.google.com/store/apps/details?id=...</td>\n",
295+
" <td>SHOPPING,APPLICATION</td>\n",
296+
" <td>False</td>\n",
297+
" <td>No.</td>\n",
298+
" </tr>\n",
299+
" <tr>\n",
300+
" <th>4263</th>\n",
301+
" <td>359917414</td>\n",
302+
" <td>Solitaire</td>\n",
303+
" <td>Solitaire by MobilityWare is the ORIGINAL make...</td>\n",
304+
" <td>https://apps.apple.com/us/app/solitaire/id3599...</td>\n",
305+
" <td>Games,Casino,Card</td>\n",
306+
" <td>True</td>\n",
307+
" <td>No</td>\n",
308+
" </tr>\n",
309+
" </tbody>\n",
310+
"</table>\n",
311+
"</div>"
312+
],
313+
"text/plain": [
314+
" bundle_id title description store_url category_names ios for_kids\n",
315+
"7477 com.alibaba.aliexpresshd AliExpress Maximum deals. Maximum fun. Shop our biggest s... https://play.google.com/store/apps/details?id=... SHOPPING,APPLICATION False No\n",
316+
"49136 com.tripledot.woodoku Woodoku - Block Puzzle Games Woodoku: a wood block puzzle game meets a sudo... https://play.google.com/store/apps/details?id=... GAME_PUZZLE,GAME False Yes\n",
317+
"383 air.com.buffalo_studios.newflashbingo Bingo Blitz™️ - Bingo Games Experience your free online bingo game as you ... https://play.google.com/store/apps/details?id=... GAME_BOARD,GAME False No.\n",
318+
"2156 1200391796 June's Journey: Hidden Objects I spy a mystery! Find the hidden objects to cr... https://apps.apple.com/us/app/junes-journey-hi... Games,Adventure,Puzzle True No.\n",
319+
"30750 com.lazada.android Lazada - Online Shopping App! Lazada boast a large selection of and assortme... https://play.google.com/store/apps/details?id=... SHOPPING,APPLICATION False No\n",
320+
"39423 com.pinterest Pinterest Pinterest is the place to explore inspiration.... https://play.google.com/store/apps/details?id=... LIFESTYLE,APPLICATION False No\n",
321+
"35146 com.moonactive.coinmaster Coin Master Join your Facebook friends and millions of pla... https://play.google.com/store/apps/details?id=... GAME_CASUAL,GAME False Yes.\n",
322+
"32145 com.macys.android Macy's The latest version of the Macy’s app is better... https://play.google.com/store/apps/details?id=... SHOPPING,APPLICATION False No.\n",
323+
"4263 359917414 Solitaire Solitaire by MobilityWare is the ORIGINAL make... https://apps.apple.com/us/app/solitaire/id3599... Games,Casino,Card True No"
324+
]
325+
},
326+
"execution_count": 3,
327+
"metadata": {},
328+
"output_type": "execute_result"
329+
}
330+
],
331+
"source": [
332+
"df[\"for_kids\"] = df.apply(chatgpt_ask(\"Given the app description:\\n{description}\\n\\nIs it for kids?\\nAnswer only yes or no\"),axis=1)\n",
333+
"df"
334+
]
335+
},
336+
{
337+
"cell_type": "markdown",
338+
"metadata": {},
339+
"source": [
340+
"## Question:\n",
341+
"Please rewrite the code such that the response to a yes/no question would be boolean (`True` or `False`)"
342+
]
343+
},
344+
{
345+
"cell_type": "code",
346+
"execution_count": null,
347+
"metadata": {},
348+
"outputs": [],
349+
"source": []
350+
}
351+
],
352+
"metadata": {
353+
"kernelspec": {
354+
"display_name": "Python 3 (ipykernel)",
355+
"language": "python",
356+
"name": "python3"
357+
},
358+
"language_info": {
359+
"codemirror_mode": {
360+
"name": "ipython",
361+
"version": 3
362+
},
363+
"file_extension": ".py",
364+
"mimetype": "text/x-python",
365+
"name": "python",
366+
"nbconvert_exporter": "python",
367+
"pygments_lexer": "ipython3",
368+
"version": "3.11.5"
369+
}
370+
},
371+
"nbformat": 4,
372+
"nbformat_minor": 4
373+
}

‎requirements.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1-
langchain==0.0.135
2-
openai==0.27.8
1+
requests==2.31.0
2+
pandas==2.0.3
3+
pyarrow==12.0.1
4+
openai==1.3.5
35
python-decouple==3.8

‎src/llm_helpers.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import json
2+
import requests
3+
from openai import OpenAI
4+
from decouple import config
5+
openai_client = OpenAI(api_key=config("OPENAI_API_KEY"))
6+
huggingfacehub_api_token = config("HUGGINGFACE_API_TOKEN")
7+
8+
def hf_ask(question: str, model_url="https://api-inference.huggingface.co/models/google/flan-t5-xxl") -> str:
9+
"""Ask a question to Huggingface, apply it to every row of a pandas dataframe and return the answer"""
10+
def pandas_func(row) -> str:
11+
prompt = question.format(**(dict(row.items())))
12+
headers = {"Authorization": f"Bearer {huggingfacehub_api_token}"}
13+
response = requests.post(
14+
model_url, headers=headers, json={"inputs": prompt})
15+
if response.status_code != 200:
16+
return None
17+
return json.loads(response.content.decode("utf-8"))[0]['generated_text']
18+
return pandas_func
19+
20+
21+
def chatgpt_ask(question: str, model_name="gpt-3.5-turbo") -> str:
22+
"""Ask a question to chatgpt, apply it to every row of a pandas dataframe and return the answer"""
23+
def pandas_func(row)-> str:
24+
try:
25+
prompt = question.format(**(dict(row.items())))
26+
completion = openai_client.chat.completions.create(
27+
model=model_name,
28+
messages=[
29+
{"role": "system", "content": "You are a helpful assistant."},
30+
{"role": "user", "content": prompt}
31+
]
32+
)
33+
ret = completion.choices[0].message.content.strip()
34+
return ret
35+
except:
36+
return None
37+
return pandas_func

0 commit comments

Comments
 (0)
Please sign in to comment.