<?xml version='1.0' encoding='UTF-8'?>
<?xml-stylesheet href="/rss/stylesheet/" type="text/xsl"?>
<rss xmlns:content='http://purl.org/rss/1.0/modules/content/' xmlns:taxo='http://purl.org/rss/1.0/modules/taxonomy/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' xmlns:itunes='http://www.itunes.com/dtds/podcast-1.0.dtd' xmlns:googleplay="http://www.google.com/schemas/play-podcasts/1.0" xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:atom='http://www.w3.org/2005/Atom' xmlns:podbridge='http://www.podbridge.com/podbridge-ad.dtd' version='2.0'>
<channel>
  <title>Johnny Lee</title>
  <language>en-us</language>
  <generator>microfeed.org</generator>
  <itunes:type>episodic</itunes:type>
  <itunes:explicit>false</itunes:explicit>
  <atom:link rel="self" href="https://johnnyclee.com/rss/" type="application/rss+xml"/>
  <link>https://johnnyclee.com</link>
  <link rel="next" href="https://johnnyclee.com/rss/?next_cursor=1683385200000&amp;sort=newest_first" type="application/rss+xml"/>
  <description>
    <![CDATA[<h2>Hi! Welcome! I’m Johnny.</h2><p><br></p><p>Building <a href="https://caisey.me" rel="noopener noreferrer" target="_blank">caisey.me</a>! Talk to me about the future of agentic AI product development and deployment!</p><p><br></p><p>Previously, at <a href="http://adept.ai" rel="noopener noreferrer" target="_blank">Adept</a>, <a href="https://amazon.jobs/en/teams/lab126/" rel="noopener noreferrer" target="_blank">Amazon Lab126</a>, <a href="https://23andme.com/" rel="noopener noreferrer" target="_blank">23andMe</a>,&nbsp;and <a href="https://salesforce.com/" rel="noopener noreferrer" target="_blank">Salesforce</a>.</p><p><br></p><p>Here, I post my ramblings on everything from technology to markets.</p><p><br></p><p>Find my latest writings at <a href="https://myriadperspectives.com" rel="noopener noreferrer" target="_blank">Myriad Perspectives</a>.</p><p><br></p><p><a href="https://x.com/ByJohnnyLee" rel="noopener noreferrer" target="_blank">https://x.com/ByJohnnyLee</a></p>]]>
  </description>
  <itunes:author>Johnny Lee</itunes:author>
  <itunes:image href="https://johnnyclee.com/assets/default/channel-image.png"/>
  <image>
    <title>Johnny Lee</title>
    <url>https://johnnyclee.com/assets/default/channel-image.png</url>
    <link>https://johnnyclee.com</link>
  </image>
  <itunes:category text="Society &amp; Culture">
    <itunes:category text="Personal Journals"/>
  </itunes:category>
  <item>
    <title>Trust: why companies by software</title>
    <guid>-zJnkqZTE9z</guid>
    <pubDate>Mon, 23 Feb 2026 04:25:44 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <link>https://johnnyclee.com/i/trust-why-companies-by-softwar--zJnkqZTE9z/</link>
    <itunes:episodeType>full</itunes:episodeType>
    <enclosure url="https://myriadperspectives.com/p/trust-why-companies-buy-software?r=1w6qut" type="text/html"/>
  </item>
  <item>
    <title>Something Small is Happening</title>
    <guid>Pn3tHnIIkGu</guid>
    <pubDate>Wed, 11 Feb 2026 00:00:00 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <itunes:episodeType>full</itunes:episodeType>
    <enclosure url="https://myriadperspectives.com/p/something-small-is-happening" type="text/html"/>
  </item>
  <item>
    <title>How to even keep up with explosive AI progress?</title>
    <guid>WaulowYqAlN</guid>
    <pubDate>Mon, 19 Jan 2026 12:57:34 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <link>https://johnnyclee.com/i/WaulowYqAlN/</link>
    <itunes:episodeType>full</itunes:episodeType>
    <enclosure url="https://myriadperspectives.com/p/how-to-even-keep-up-with-explosive" type="text/html"/>
  </item>
  <item>
    <title>How AI Is Changing the Way Students Learn</title>
    <guid>02Sx4lS0NZI</guid>
    <pubDate>Mon, 24 Nov 2025 22:28:36 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <link>https://johnnyclee.com/i/02Sx4lS0NZI/</link>
    <itunes:image href="https://cdn.johnnyclee.com/main-johnnyclee-com/production/images/item-3d9f73302e474327ffceb8791b43b0e7.JPG"/>
    <itunes:episodeType>full</itunes:episodeType>
    <enclosure url="https://business.columbia.edu/insights/digital-future/ai-learning-platform" type="text/html"/>
  </item>
  <item>
    <title>OpenAI&apos;s road to become a hyperscaler</title>
    <guid>tSkynN1uDst</guid>
    <pubDate>Sun, 19 Oct 2025 22:36:00 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <link>https://johnnyclee.com/i/tSkynN1uDst/</link>
    <itunes:episodeType>full</itunes:episodeType>
    <enclosure url="https://myriadperspectives.com/p/openais-road-to-become-a-hyperscaler" type="text/html"/>
  </item>
  <item>
    <title>Power &amp; Fab Capacity: Last Jigsaw Pieces in the Dash for Compute</title>
    <guid>ZkxnZI-YDhQ</guid>
    <pubDate>Wed, 08 Oct 2025 22:36:00 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <link>https://johnnyclee.com/i/ZkxnZI-YDhQ/</link>
    <itunes:episodeType>full</itunes:episodeType>
    <enclosure url="https://myriadperspectives.com/p/power-and-fab-capacity-last-jigsaw" type="text/html"/>
  </item>
  <item>
    <title>Cut-throat AI competition, captive resources, Google&apos;s AI moat</title>
    <guid>OZUSCJm5t0o</guid>
    <pubDate>Sun, 20 Jul 2025 22:35:00 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <link>https://johnnyclee.com/i/OZUSCJm5t0o/</link>
    <itunes:episodeType>full</itunes:episodeType>
    <enclosure url="https://myriadperspectives.com/p/cut-throat-ai-competition-captive" type="text/html"/>
  </item>
  <item>
    <title>A.I. usage surging across America</title>
    <guid>6nnVUcKl_eY</guid>
    <pubDate>Tue, 24 Jun 2025 22:34:00 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <link>https://johnnyclee.com/i/6nnVUcKl_eY/</link>
    <itunes:episodeType>full</itunes:episodeType>
    <enclosure url="https://myriadperspectives.com/p/ai-usage-surging-across-america-update" type="text/html"/>
  </item>
  <item>
    <title>Crossroads Ahead: US Market Confidence Belies Historic Uncertainty in Economic Growth</title>
    <guid>HEUS38C3jjv</guid>
    <pubDate>Fri, 02 May 2025 01:58:15 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<p>One month after Liberation Day, the US stock market looks more or less re-assured about the future economic outlook. For the month of April, the S&amp;P 500 is only down ~1% and the NASDAQ is up ~1%.</p><p><br></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd8325124-07c9-401a-bd53-9b5e08f629f8_1600x1066.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd8325124-07c9-401a-bd53-9b5e08f629f8_1600x1066.png"></a></p><h3>Liberation Day unleashed the next stage of trade warfare</h3><p>Shortly after inauguration, the new US administration embarked on a new trade war footing. The US placed new tariffs on goods from China, Canada, and Mexico. Throughout February and March, the counterparties likewise have increased their tariffs in response in a tit-for-tat motion–with some occasional pausing and backsliding.</p><p>However on April 2nd (Liberation Day), the US announced broad tariffs on nearly every US trading partner, with an indiscriminate minimum tariff rate of 10% and higher. In more tit-for-tat motions, the US raised tariffs on China to a cumulative tariff of 145%, with China retaliating with a cumulative tariff of 125%.&nbsp;&nbsp;</p><p>The new “reciprocal” tariffs exceeded Wall Street’s worst expectations. US equity and fixed income markets reacted dramatically to the broad protectionist measures.&nbsp;&nbsp;</p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F465a6540-da39-40c1-8829-f5910300efd2_1282x792.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F465a6540-da39-40c1-8829-f5910300efd2_1282x792.png"></a></p><p>In about a week, the S&amp;P 500 lost ~11%. US junk bonds yield increased ~100bps. The US 10-year treasury rate increased ~50bps, the largest weekly increase since 2001. The 10-year is frequently used as the world’s risk-free rate for capital markets, as a global barometer of investment risk appetite and trust in the US financial system.</p><p>While Trump suspended the reciprocal tariffs (10%+ broad tariffs) for 90 days for all trading partners on April 9, the US-China tariffs remained. As of May 1, 2025, the US and China representatives are not in any active bilateral negotiations.</p><p><br></p><h3>2025 starts with economic contraction</h3><p>In the first contraction since the first quarter of 2022, the US economy contracted&nbsp;<a href="https://www.bea.gov/news/2025/gross-domestic-product-1st-quarter-2025-advance-estimate" rel="noopener noreferrer" target="_blank">0.3%</a>&nbsp;in the first quarter of 2025, mostly driven by imports front-running tariffs. The 2022 contraction had the backdrop of the highest inflation rates since the 1980s. In June 2022, annualized inflation reached a high of 9.1% . Around the same time, the Federal Reserve started tightening financial conditions to combat inflation: raising interest rates from 0% to a high of 5.5% in 2023.&nbsp;</p><p>This time around, there are more signs of structural obstacles to the growth of the US economy, relative to the ex-US world.</p><p><strong>1. The “US” brand as a symbol of free trade and safe haven has forever been damaged.</strong>&nbsp;“Liberation Day” and the fallout from the US trade policy progression continues to puzzle US and foreign investors. The lack of coherent US policy objectives creates tremendous uncertainty in global trade and development.&nbsp;</p><ol><li class="ql-indent-1">Year to date, the dollar has continued to weaken, with the US dollar index down ~7%.&nbsp;Tourism to the US sharply declined in the days after tariffs.<img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F595f4a19-fd88-4f26-a1bb-3d006c2ae48a_724x414.png"></li><li class="ql-indent-1">Ex-US equities are up ~8.4% for the year, outperforming US equities by ~13 points.<img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F860a2995-6edf-45e0-98c2-fc138394e190_1600x1066.png"></li><li class="ql-indent-1">The bond selloff in the treasury and corporate bond market was mostly foreign driven, while the recent rebound in prices was mostly inflow from US domestic investors.</li><li class="ql-indent-1">The lack of policy coherence creates a large disincentive for 1-5 year capital investments in the US, especially as it relates to reshoring manufacturing and infrastructure capacity.&nbsp;</li><li class="ql-indent-1">For example, Apple, the symbol for US consumer brands, has no plans to move manufacturing to the US, but instead will shift more assembly volume to&nbsp;<a href="https://www.ft.com/content/c2be45b8-cfad-4cbb-9a1a-bfd0626be372" rel="noopener noreferrer" target="_blank">India</a>, mostly as a means to side-step the US tariffs.&nbsp;</li></ol><p><strong>2. The supply chain shock from a man-made shock to the global trade system has yet to arrive.</strong></p><ol><li class="ql-indent-1">As much as the US administration may want to decouple global trade, it cannot be done overnight.&nbsp;</li><li class="ql-indent-1">Similar to the COVID dual-sided (supply + demand) shock, the supply chain is undergoing a similar shock. Sea-bound containers from China to US have fallen 45% year-over-year in April 2025.&nbsp;<img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F451f4453-9594-4328-8749-f4dd22220d86_1424x1022.png"></li><li class="ql-indent-1">While US merchants front-loaded inventory, they are simply delaying the inevitable: a combination of (1) a shortage of goods and/or (2) a massive drop in demand due to increased prices due to tariffs.&nbsp;</li><li class="ql-indent-1">This type of supply shock will impact small businesses the most, those who do not have the working capital capacity to soften the supply chain shock with increased inventory. Small businesses employ 80% of America and are responsible for&nbsp;<a href="https://www.apolloacademy.com/how-are-imports-from-china-used-in-the-us/#:~:text=%20In%20addition%2C%20small%20and%20medium%2Dsized%20enterprises%20account%20for%2041%%20of%20imports%20from%20China." rel="noopener noreferrer" target="_blank">~40%</a>&nbsp;of US imports from China. When small businesses suffer, America suffers.</li><li class="ql-indent-1">Even assuming the US and China come to some type of equilibrium that is able to restart trade–current conditions are tantamount to a trade embargo for most goods, the infamous&nbsp;<a href="https://www.investopedia.com/bullwhip-effect-definition-5499228" rel="noopener noreferrer" target="_blank">bullwhip</a>&nbsp;effect will come back into play.&nbsp;</li><li class="ql-indent-1">The US inventory to sales&nbsp;<a href="https://fred.stlouisfed.org/series/ISRATIO" rel="noopener noreferrer" target="_blank">ratio</a>&nbsp;will give us a good indicator of how severe the shocks will be this time around, with March 2025 figures releasing May 15.</li></ol><p><strong>3. Shift in global security and perspectives is already becoming evident.</strong>&nbsp;Geopolitically, the US absconding from the global alliances and partnerships will have longer term consequences beyond one US presidential term.&nbsp;</p><ol><li class="ql-indent-1"><strong>Canada:&nbsp;</strong>Most obviously, Trump’s policies have literally reversed the Canadian election. Enabling Mark Carney’s Liberal party to erase the Conservative party’s ~10-15 point lead over a span of 40 days. The Conservative party leader even&nbsp;<a href="https://www.cbc.ca/news/politics/poilievre-uncertain-future-1.7521681" rel="noopener noreferrer" target="_blank">lost</a>&nbsp;his own seat.<img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcafce144-ad98-444c-bee7-cd8110a4361c_1508x1186.png"></li><li class="ql-indent-1"><strong>European security: Germany is gearing up for arms.&nbsp;</strong>Its defense industry base is being re-ignited after Germany&nbsp;<a href="https://www.bbc.com/news/articles/c62z6gljv2yo" rel="noopener noreferrer" target="_blank">approved</a>&nbsp;a new 500 billion euro infrastructure fund. Shepherded by Merz–who was elected in Feb 2025 after Trump started the trade war and embarrassed Ukraine at the White House, the new law exempts defense and security spending from Germany’s strict debt guidelines–enabling the state to issue more Bunds to finance its security objectives.&nbsp;German defense companies (i.e. Rheinmetall) have more than doubled in value since then.&nbsp;<img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F28a4e8e6-d961-4d70-b979-dfaa8a272eca_600x617.png"></li><li class="ql-indent-1"><strong>Indo-Pacific security:&nbsp;</strong>Australia and Philippines will hold general elections in May 2025, on the 3rd and 12th respectively. Both countries play a crucial role in Indo-Pacific security, as a US allies and regional balance of power against China. The Australian elections have already been an intense balance between its Chinese and US ties in recent weeks.&nbsp;</li></ol><p><strong>4. Pending new tax and budget legislation, US debt ceiling and fiscal outlook remain similarly uncertain as trade policy.</strong></p><ol><li class="ql-indent-1">The White House places a July 4th&nbsp;<a href="https://www.politico.com/live-updates/2025/04/28/congress/thune-reconciliation-timeline-debt-limit-00313666" rel="noopener noreferrer" target="_blank">deadline</a>&nbsp;to finish passing Trump’s tax agenda.</li><li class="ql-indent-1">The X date (when the US treasury will reach the debt ceiling) is likely to be sometime&nbsp;<a href="https://www.politico.com/live-updates/2025/03/26/congress/us-likely-to-reach-debt-limit-x-date-in-august-or-september-congress-scorekeeper-predicts-00250525" rel="noopener noreferrer" target="_blank">this summer or early Fall</a>.</li><li class="ql-indent-1">With uncertain economic outcomes over the short and medium term, drastic changes in lowering tax receipts relative to debt burden will place additional stress on Treasury yields.&nbsp;</li></ol><h3><br></h3><h3>The crossroads ahead</h3><p>The most important major crossroad will on the path to US-China trade negotiations in earnest.&nbsp;</p><p>While the two parties have yet to talk, there are increasing unilateral signals that both sides are willing to sit down.&nbsp;</p><p>On May 2, China (via its state owned media CCTV) has&nbsp;<a href="https://sinocism.com/i/162540167/us-china-trade-war" rel="noopener noreferrer" target="_blank">signaled</a>&nbsp;some willingness to approach U.S. under the right circumstances. But China’s other arms of state continue to&nbsp;<a href="https://www.ft.com/content/b03e5212-c3f1-40fa-aaa0-ee8b0a47b981" rel="noopener noreferrer" target="_blank">message</a>&nbsp;a strong stance of no concessions with the US.</p><p>The backdrop for these prospective talks are not good: (1) uncertain economic growth, (2) declining consumer confidence, (3) prospects of skyrocketing prices, and (4) uncertain US fiscal and tax outlook.</p><p><br></p><h3><strong>Events to watch</strong></h3><ul><li>May 3: Australian Parliament elections</li><li>May 7: Federal Reserve interest rate decision</li><li>May 12: Philippines midterm elections</li><li>May 15: U.S. March 2025 inventory-to-sales survey data</li><li>May 28: Federal Reserve May meeting minutes release</li></ul>]]>
    </description>
    <link>https://johnnyclee.com/i/HEUS38C3jjv/</link>
    <itunes:episodeType>full</itunes:episodeType>
    <enclosure url="https://myriadperspectives.substack.com/p/crossroads-ahead-us-market-confidence" type="text/html"/>
  </item>
  <item>
    <title>Optimizely for Intelligence</title>
    <guid>MEQjg0RYpnX</guid>
    <pubDate>Tue, 11 Mar 2025 20:14:10 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<h3>The Road Towards Intelligence</h3><p>The AI community continues to drive model improvements by pulling 2 key levers:</p><p><br></p><h4>1. Compute</h4><p><br></p><ul><li><strong>Algorithmic Improvements</strong>: Innovations such as new forms of reinforcement learning, and attention mechanisms contribute to increasing efficiency. Improvements also focus on achieving the same performance with less compute intensity.</li><li><strong>Access to More Computing Power</strong>: The industry–for the most part–relies on NVIDIA chips, large-scale data centers, and capital investment to expand computational resources and supply.</li></ul><p><br></p><h4>2. Data</h4><p><br></p><ul><li>AI models learn from structured and unstructured data sources.&nbsp;</li><li>High quality data with previously unincluded knowledge is non-negotiable in improving intelligence.</li><li>Companies continuously invest in refining/curating datasets, via humans or machines.</li></ul><p><br></p><h3>Country of Geniuses vs. Country of Yes-Men</h3><p>Dario Amodei, CEO of Anthropic, envisions a future where data centers house a "country of geniuses," a concept he explores in his essay <a href="https://darioamodei.com/machines-of-loving-grace" rel="noopener noreferrer" target="_blank"><em>Machines of Loving Grace</em></a>. He argues that intelligence will continue to advance, though with physical constraints in sectors like biology, where real-world rate limitations affect progress.</p><p><br></p><p>Conversely, Thomas Wolf of Hugging Face (unsurprisingly, the open source foil to the close-sourced AI CEO) presents a counterpoint, arguing that we won’t experience a “compressed 21st century” of rapid innovation. He argues that AI models could function more like a "<a href="https://thomwolf.io/blog/scientific-ai.html" rel="noopener noreferrer" target="_blank">country of yes-men</a>" rather than a "country of geniuses." While models will be a valuable source of knowledge, he believes it’s more akin to a very obedient and A+ student than it is a genius for scientific discoveries.</p><p><br></p><p>Personally, I’m more inclined to believe in Thomas’s argument, AI models primarily predict the most probable token during pre-training. While post-training techniques like reinforcement learning introduce more nuanced rewards for exploration (a la search), the core predictive mechanism may limit the model’s ability to challenge existing knowledge or pursue unconventional reasoning–esspecially if it is deemed improbable by historic data.&nbsp;</p><p><br></p><p>Regardless, I do believe Tyler Cowen’s <a href="https://marginalrevolution.com/marginalrevolution/2025/02/why-i-think-ai-take-off-is-relatively-slow.html" rel="noopener noreferrer" target="_blank">viewpoint</a>. Humans and our “sticky” modes of societal interaction will be a rate limiting factor in how quickly we can leverage the new sources of intelligence.</p><p><br></p><h3>AI usage today</h3><p><br></p><p>Right now, there are a few ways most people access large language models (LLMs) and AI:</p><p><br></p><h4><strong>Chatbot Products</strong></h4><p>People simply use the labs’ chatbot products like ChatGPT, Claude, or Gemini, and interact with them in a chat interface.&nbsp;</p><p><br></p><ul><li>The chatbot application might select some default model settings or have access to certain tools to make the conversation more insightful.&nbsp;</li><li>If you’re more knowledgeable, you might pick a different model—like gpt-4o vs. o1, o3, or some other variation—but for a typical user, those model names and versions don’t mean much.</li></ul><h4><strong>LLMs Embedded in Existing Applications</strong></h4><p>A straightforward example is in software engineering, where developers use an AI-augmented IDE like Cursor.&nbsp;</p><p><br></p><ul><li>The model’s text understanding and knowledge base helps autocomplete and suggest code.&nbsp;</li><li>If you define a new function or module and your codebase is well documented—or is otherwise known to the model—it can help you quickly generate boilerplate or complete new code.&nbsp;</li><li>You then run and test it to ensure correctness. Essentially, it’s augmenting existing software (an IDE) with capabilities that can enhance developer productivity.</li></ul><h4><strong>APIs in the Background</strong></h4><p>This is where LLMs are accessed through APIs to add conversational or classification features, object identification, editing suggestions, and so on.&nbsp;</p><p><br></p><ul><li>For example, if you contact customer support via a chat, there’s likely a large language model in the background analyzing or drafting replies.&nbsp;</li><li>A human agent might still finalize the response—or it may be fully automated—depending on the setup.&nbsp;</li><li>In either case, the AI is an API-powered component within a larger process.</li></ul><h3><br></h3><h3>Adapting to the New Age of Intelligence &amp; Cost-Benefit Analysis</h3><p>Users need to adapt, change, or add new modes of interacting with computers. I think that will be critical for how we bring this technology into the world. Intelligence that creates leverage and growth.</p><p><br></p><p>It may be slightly unhelpful to think we can simply apply deterministic frameworks from the last couple of decades and hope they apply similarly here.&nbsp;</p><p><br></p><p>For instance, there’s a lot of hype and optimism around the word “agents.” The idea is that you can delegate a certain task to an AI system.&nbsp;</p><p><br></p><p>Think about the agent loop:</p><p><br></p><ol><li><strong>User Input</strong>: A user provides a task, specification, or request to an AI system.</li><li><strong>Inference + Tools</strong>: The AI system—powered by one or multiple large language models—interprets that request and makes inferences. Part of the inference involves accessing various tools, which might be deterministic ones like a calculator, a code interpreter, a web browser, or even other models (e.g., an image generator if you need an image).</li><li><strong>Execution &amp; Feedback</strong>: The system predicts what it should do next using these tools, retrieves the result, and repeats this loop until it believes the request has been completed.</li><li><strong>Response</strong>: Finally, it returns the response to the user.</li></ol><p><br></p><p>In a classic API setting, you have a request and a defined contract with an expected response type. You typically get back a success status or some outcome. Here, we’re trying to fit that framework onto a probabilistic system by slowly injecting and abstracting away the probabilistic compute under the hood.</p><p><br></p><p>That’s a good way to start adopting these tools, but the challenge remains that these systems will make mistakes.&nbsp;</p><p><br></p><p>Some tasks can tolerate that, and others cannot. In cases where error rates are relatively low and the cost of an error isn’t too high, the cost-benefit equation may still favor deploying these agents.&nbsp;</p><p><br></p><p>This often applies to areas like customer service or fraud detection, where the cost of a false positive might not be extremely high, but manually reviewing each case would be very expensive.&nbsp;</p><p><br></p><p>In effect, you’re balancing the cost of mistakes against the total volume of tasks.</p><p><br></p><h3>Optimizely for Intelligence</h3><p>“Optimizely for Intelligence,” in my mind, is another way of saying we should help the average user navigate the world of AI in a way that feels more deterministic and controlled.&nbsp;</p><p><br></p><p>Optimizely gave creative marketers a deterministic method of navigating a probabilistic decision making process of choosing the right content and creative for their audiences.</p><p><br></p><p>In a world where we have variable-cost intelligence—easily supplied, nearly a commodity—and yet so many different “SKUs” or choices, the current market paradox is that people are not actually leveraging any choice.&nbsp;</p><p><br></p><p>They’re just sticking to a single model or setup.&nbsp;</p><p><br></p><p>So the question is: How can we inject that choice in a way that empowers the user or consumer to pick what’s best for them?</p><p><br></p><p>Maybe this doesn’t have to be explicit, it may be abstracted away with signals from the user’s experience.&nbsp;</p><p><br></p><p>The challenge remains, also challenging for humans:</p><p><br></p><p><strong>Choosing the right mode of intelligence at the right time for the right task.&nbsp;</strong></p><p><br></p>]]>
    </description>
    <link>https://johnnyclee.com/i/MEQjg0RYpnX/</link>
    <itunes:episodeType>full</itunes:episodeType>
  </item>
  <item>
    <title>GPT4.5: Good or not, its launch is good for the LLM serving business</title>
    <guid>wLce3ACG414</guid>
    <pubDate>Tue, 04 Mar 2025 20:58:10 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<p>GPT-4.5&nbsp;<a href="https://openai.com/index/introducing-gpt-4-5/" rel="noopener noreferrer" target="_blank">launched</a>&nbsp;as a preview last Thursday. GPT-4.5 topped benchmarks like&nbsp;<a href="https://x.com/lmarena_ai/status/1896590146465579105" rel="noopener noreferrer" target="_blank">LMArena</a>, only to be&nbsp;<a href="https://x.com/lmarena_ai/status/1896675400916566357" rel="noopener noreferrer" target="_blank">matched</a>&nbsp;by Grok-3 shortly after.</p><p><br></p><p>Regardless of the reception on quality, OpenAI’s decision to roll out their largest model to the market is a clever business maneuver.&nbsp;</p><p><br></p><p>OpenAI is the undisputed market leader in the LLM serving business. The company projected&nbsp;<a href="https://www.nytimes.com/2024/09/27/technology/openai-chatgpt-investors-funding.html#:~:text=the%20company%20expects%20about%20$3.7%20billion%20in%20annual%20sales%20this%20year" rel="noopener noreferrer" target="_blank">$3.7B</a>&nbsp;in revenues in 2024. Its closest competitor–Anthropic–was projected to have closed 2024 with&nbsp;<a href="https://www.bloomberg.com/news/articles/2025-03-03/anthropic-finalizes-megaround-at-61-5-billion-valuation#:~:text=Late%20last%20year%2C%20the%20company%E2%80%99s%20annual%20revenue%20run%20rate%20was%20about%20$1%20billion%2C%20according%20to%20a%20person%20familiar%20with%20the%20matter" rel="noopener noreferrer" target="_blank">$1B</a>&nbsp;in revenues.</p><p><br></p><p>GPT 4.5 is the most&nbsp;<a href="https://openai.com/api/pricing/" rel="noopener noreferrer" target="_blank">expensive</a>&nbsp;API model on the market today, and also OpenAI’s most expensive model to-date.</p><ul><li>Prompt tokens are $75 per 1M tokens&nbsp;</li><li>Completion tokens are $150 per 1M tokens</li></ul><p><br></p><p>Previously, OpenAI’s short-lived&nbsp;<a href="https://openai.com/index/gpt-4-research/#:~:text=of%20text)%20version%2C-,gpt%2D4%2D32k,-%2C%20which%20will%20also" rel="noopener noreferrer" target="_blank">GPT 4 (32k context)</a>&nbsp;(launched March 2023) was the most expensive model:</p><ul><li>Prompt tokens were $60 per 1M tokens&nbsp;</li><li>Completion tokens were $120 per 1M tokens&nbsp;</li><li>GPT 4’s 8k context variant is 50% cheaper, at $30/$60.</li></ul><p><br></p><h3><strong>Price dynamics in a competitive market</strong></h3><h3><br></h3><p>In competitive markets, pricing decisions have massive impacts on the market’s overall profit potential.&nbsp;</p><p><br></p><p>This is especially true in markets with oligopoly characteristics. When a select few suppliers control the market, one firm’s decisions can dramatically change the profits of the entire market.</p><p><br></p><p>In many ways, these markets present a classic&nbsp;<a href="https://socialsci.libretexts.org/Bookshelves/Economics/The_Economics_of_Food_and_Agricultural_Markets_(Barkley)/05%3A__Monopolistic_Competition_and_Oligopoly/5.04%3A_Oligopoly%2C_Collusion%2C_and_Game_Theory#:~:text=Prisoner%E2%80%99s%20Dilemmas%20are%20very%20common%20in%20oligopoly%20markets:%20" rel="noopener noreferrer" target="_blank">prisoner’s dilemma</a>, especially if the products are fairly interchangeable.&nbsp;</p><p><br></p><p>Take the classic gas station example.&nbsp;</p><ol><li>There is one small town with only 3 gas stations.</li><li>They likely have similar cost-structures. Wholesale gasoline costs the same amount.&nbsp;</li><li>Given a small town, the demand is relatively predictable as price fluctuates.&nbsp;</li></ol><p><br></p><p>The best profit outcome (for the gas stations) is if all 3 firms cooperate (collude) to keep prices high. In a prisoner’s dilemma, this is the “collusion” outcome: no one goes to prison if no one cooperates with the police.&nbsp;</p><p><br></p><ul><li>They can reach what is known as a high molopoly profit position (not possible with a competitive market at equilibrium).&nbsp;</li><li>This profit can be generated by raising prices to the level where demand and price maximizes profits.&nbsp;</li><li>The town will still be willing to buy gasoline at some high price, but at some lower volume. The higher prices will more than compensate for the lower volume.</li></ul><p><br></p><p>The worst profit outcome (for the gas stations) is if 1 gas station lowers prices to generate more demand for their own gas station.&nbsp;</p><p><br></p><ul><li>Then, other gas stations will lower prices to stay competitive, which is a race to the bottom.&nbsp;</li><li>The small town now has cheaper gas, but the gas stations likely make much less profits.</li></ul><p><br></p><h3><strong>Outright price collusion is illegal, but signaling and tacit collusion is not</strong></h3><h3><br></h3><p>For the players in consumer technology, price fixing is not&nbsp;<a href="http://www.wsj.com/articles/supreme-court-turns-away-apple-appeal-in-e-books-antitrust-case-1457362484?reflink=desktopwebshare_permalink" rel="noopener noreferrer" target="_blank">new</a>. Because of its attractiveness, examples are&nbsp;<a href="https://en.wikipedia.org/wiki/List_of_price_fixing_cases" rel="noopener noreferrer" target="_blank">plentiful</a>.</p><p><br></p><p>Yet, there are legal ways to accomplish cooperation, and also proven to work via game theory. Here are some examples:</p><p><br></p><ul><li><strong>Price leadership:</strong>&nbsp;In a small market, firms implicitly and simply follow the leader. The leader will signal price levels first. For example, Apple often sets the market leading levels for high quality smart phones around the world, and others follow the price levels.&nbsp;</li><li><strong>Price matching:</strong>&nbsp;Retailers will put in-place “price matching” programs. Signaling to others that, as long as others set a high price, they will follow.&nbsp;</li><li><strong>Advanced notice of new supply:</strong>&nbsp;For slow moving markets like aircraft manufacturers (i.e. Boeing vs. Airbus), they will publicly announce order books of not-yet developed aircraft types. Such that, it does not lead to a supply glut that tanks the market.</li></ul><p><br></p><h3><strong>GPT4.5 signals price level for leading frontier models, its closest competitor is signaling too</strong></h3><h3><br></h3><p>As the most expensive API model on the market, OpenAI is definitively setting the price for this class of leading edge models. When the market leader moves, others in the market will take note.</p><p><br></p><p>Its second place competitor (Anthropic) has not yet released a similar class model.&nbsp;</p><p><br></p><p>Though, Dario Amodei, Anthropic’s CEO, has doubled down on a new&nbsp;<a href="https://www.anthropic.com/news/uk-ai-safety-summit#:~:text=to%20encourage%20a%20%E2%80%9C-,race%20to%20the%20top,-''%20in%20RSP%2Dstyle" rel="noopener noreferrer" target="_blank">phrase</a>: “<a href="https://lexfridman.com/dario-amodei-transcript/#:~:text=And%20we%20have%20a%20theory%20of%20change%20called%20Race%20to%20the%20Top" rel="noopener noreferrer" target="_blank">race</a>&nbsp;to the&nbsp;<a href="https://www.dwarkesh.com//p/dario-amodei" rel="noopener noreferrer" target="_blank">top</a>”.</p><p><br></p><p>Now, that may be in the context of model safety and responsibility.&nbsp;</p><p><br></p><p>But, if OpenAI reads the tea leaves, and it may understand there is potential willingness to cooperate tacitly.&nbsp;</p><p><br></p><p>For these firms, their current gross&nbsp;<a href="https://johnnyclee.com/i/are-frontier-labs-making-80percent-gross-margins-on-llm-tTD9fwOZmuc/" rel="noopener noreferrer" target="_blank">margins</a>&nbsp;are too good to leave unprotected.</p>]]>
    </description>
    <link>https://johnnyclee.com/i/wLce3ACG414/</link>
    <itunes:image href="https://cdn.johnnyclee.com/main-johnnyclee-com/production/images/item-235bb36d1ece9d8072b6ca9cec1a8b1c.jpeg"/>
    <itunes:episodeType>full</itunes:episodeType>
  </item>
  <item>
    <title>Are frontier labs making 80+% gross margins on LLM inference?</title>
    <guid>tTD9fwOZmuc</guid>
    <pubDate>Mon, 03 Mar 2025 04:01:37 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<h2>Rare insights into gross margin profile for a LLM model provider</h2><p><br></p><p>Recently, as part of their open source week, DeekSeek <a href="https://github.com/deepseek-ai/open-infra-index/blob/3122a25220273cb0806f591e52c4171bd869d8ad/202502OpenSourceWeek/day_6_one_more_thing_deepseekV3R1_inference_system_overview.md" rel="noopener noreferrer" target="_blank">disclosed</a> their online inference system design and performance statistics. This system serves all of DeepSeek’s first-party model services (API and chat services).</p><p><br></p><p>Over a single day, DeepSeek utilized an average of 1,814 H800 GPUs (up to 2,224 GPUs at peak loads) to serve all of its inference workloads. H800 GPUs are US export-eligible variants of NVIDIA’s H100 GPUs.</p><p><br></p><p>At the moment, H100 GPUs are likely around ~$<a href="https://www.together.ai/pricing#gpu-clusters:~:text=Starting%20at%20%241.75/hr" rel="noopener noreferrer" target="_blank">1.75</a> to ~$2.50 (per GPU per hr) on a long-term reserved basis. Using the same $2.00/H800 gpu/hr assumption as DeepSeek, their daily GPU inference costs are $87,072 for the entire inference cluster.</p><p><br></p><p>Currently, only usage of DeepSeek’s API services are monetized. Their chat services (via the web and mobile app) continue to be free.&nbsp;</p><p><br></p><p>Using the current DeepSeek R1 API pricing, the company said it could theoretically generate about ~$562k in daily revenues, representing ~84.5% in gross margins.</p><h3><br></h3><h2><strong>Extrapolating from DeepSeek, how much margins could other providers be making?</strong></h2><p><br></p><p>Mostly as a thought experiment, if other labs were able to incur some multiple N of DeepSeek’s inference cost structure, what would be their gross margins?</p><p><br></p><p>With DeepSeek R1’s very low pricing, they are already able to produce ~85% in gross margins.&nbsp;</p><p><br></p><p>If OpenAI served gpt-4o and Anthropic served claude-3.7-sonnet at the 1x DeepSeek’s cost structure, they would be making ~96.9% and ~97.5% gross margins!</p><p><br></p><p>Now, that may not yet be realistic as DeepSeek’s innovations do not transfer instantaneously to other labs. It will take some time for these labs to absorb the same cost improvements into their sprawling model training pipeline, and then into their inference systems.</p><p><br></p><p>But if we take a less favorable view, and assume OpenAI and Anthropic’s gpt-4o and claude-3.7-sonnet cost structures are 5x less efficient than DeepSeek.&nbsp;</p><p><br></p><p>Astonishingly, the less favorable gross margins are still ~84.3% and 87.3% respectively!</p><p><br></p><p><img src="https://cdn.johnnyclee.com/main-johnnyclee-com/production/media/rich-editor/items/tTD9fwOZmuc/image-c963275440f24735ad8cdb3eb451758e.png"></p><p><br></p><h2><strong>What kind of trade-offs did DeepSeek make?</strong></h2><p><br></p><p>DeepSeek’s V3 and R1 models created a lot of <a href="https://techcrunch.com/2025/01/27/nvidia-drops-600bn-off-its-market-cap-amid-the-rise-of-deepseek/" rel="noopener noreferrer" target="_blank">shock and awe</a> early in the year.&nbsp;</p><p><br></p><p>Their transformer architecture <a href="https://epoch.ai/gradient-updates/how-has-deepseek-improved-the-transformer-architecture" rel="noopener noreferrer" target="_blank">improvements</a> likely contributed significantly to their ability to serve V3 and R1 so cheaply with much less GPUs. Their MLA (multi-latent attention) innovation significantly reduced the Key-Value cache requirements, allowing them to parallelize the attention computations across much larger batch sizes (without running out of memory).&nbsp;</p><p><br></p><p>High batch sizes lead to higher throughput, at the cost of latency. Higher throughput means higher utilization of GPUs, and thus lower costs. You can read much more in-depth explanations on inference system trade-offs from <a href="https://jax-ml.github.io/scaling-book/inference/" rel="noopener noreferrer" target="_blank">Google</a>.</p><p><br></p><p>Because their mixture-of-experts (MoE) has a very high sparsity factor (256 experts; 8 activated), they also need to utilize larger scale cross-node expert parallelism to achieve optimal load balancing for the high batch size.&nbsp;</p><p><br></p><p>As a result, Deepseek’s inference is much slower than other providers. DeepSeek is as slow as OpenAI’s recently released gpt-4.5; which is accepted to be a much larger model (larger models are slower to serve, and thus more costly).</p><p><br></p><p><br></p><p><img src="https://cdn.johnnyclee.com/main-johnnyclee-com/production/media/rich-editor/items/tTD9fwOZmuc/image-90242ff13487e317d868c895fded1e41.png"></p>]]>
    </description>
    <link>https://johnnyclee.com/i/are-frontier-labs-making-80percent-gross-margins-on-llm-tTD9fwOZmuc/</link>
    <itunes:image href="https://cdn.johnnyclee.com/main-johnnyclee-com/production/images/item-8b24f74d3de6a95e4d219b3875b64558.png"/>
    <itunes:episodeType>full</itunes:episodeType>
  </item>
  <item>
    <title>DeepSeek: Compounding progress… delayed market reactions</title>
    <guid>q6bGDu4MqPr</guid>
    <pubDate>Tue, 28 Jan 2025 01:54:35 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<p>It may surprise some people, but many of the improvements that <a href="https://deepseek.com/" rel="noopener noreferrer" target="_blank">DeepSeek</a> (<a href="https://www.high-flyer.cn/en/blog/" rel="noopener noreferrer" target="_blank">High Flyer</a>’s AI lab) incorporated into DeepSeek V3 were released on May 7, 2024 as part of their <a href="https://arxiv.org/abs/2405.04434" rel="noopener noreferrer" target="_blank">DeepSeek V2</a> –almost 2 full months before Meta released their groundbreaking Llama 3 <a href="https://arxiv.org/abs/2407.21783" rel="noopener noreferrer" target="_blank">paper</a> in July 2024.&nbsp;</p><p><br></p><p>Almost 9 months later, suddenly, public markets <a href="https://www.wsj.com/tech/chip-stocks-tumble-after-chinas-deepseek-ai-models-raise-doubts-over-u-s-tech-dominance-9799591b" rel="noopener noreferrer" target="_blank">decided</a> that NVIDIA should be worth ~$500B less. Okay…, there was a catalyst, DeepSeek released <a href="https://arxiv.org/abs/2501.12948" rel="noopener noreferrer" target="_blank">R1</a>, their o1/3 comparable model the week before.</p><p><br></p><h1><strong>Not an overnight affair: Random walk with upward drift</strong></h1><p><br></p><p>Yet, the progress was gradual; an o1/3 class model in the public was an eventuality in 2025 [1].</p><p><br></p><p>Modern ML is mostly based on intuition, gut, and grunt work–betting on big ideas and empirically validating the results. One idea, built on top of the last useful one, without a clear line of sight on the next research breakthrough.</p><p><br></p><p>Most AI researchers are still compute/GPU constrained (or some people like to say the GPU rich vs. the GPU poor; maybe not the best turn of phrase). There is often a mile-long laundry list of experiments they wish they can implement/run on any given day. But not enough compute.&nbsp;</p><p><br></p><p>With some luck and sheer grunt work, researchers often stumble on new clever ideas that work. Folks have learned a long time ago (<a href="https://www.cs.utexas.edu/~eunsol/courses/data/bitter_lesson.pdf" rel="noopener noreferrer" target="_blank">The Bitter Lesson</a>) that letting machines learn is often the best way. By sheer coincidence, another team of researchers (from the Hong Kong University of Science and Technology) converged on the same R1 reinforcement learning findings as DeepSeek, <a href="https://github.com/hkust-nlp/simpleRL-reason" rel="noopener noreferrer" target="_blank">published</a> only a few days apart.</p><p><br></p><p>DeepSeek’s journey training LLMs did not start in 2024, it started long before as a side project at High Flyer [2] from around 2019 [3].&nbsp;</p><p><br></p><h1><strong>From DeepSeek V1 to R1: Leveraging other open source research</strong></h1><p><br></p><p>DeepSeek’s progress emerged from a sea of impressive and accelerating research. Mistrial and META’s open source stance only accelerated their progress. You can clearly see DeepSeek took many architectural and scaling guidance from the other labs (size of training runs in tokens trained, hyper parameters, etc..).&nbsp;</p><p><br></p><h2><u>Timeline of highlighted milestones related to DeepSeek LLM development</u></h2><p><br></p><ul><li><strong>JUNE 27, 2023:</strong> High Flyer announces proprietary internal <a href="https://www.high-flyer.cn/en/blog/hai-llm/" rel="noopener noreferrer" target="_blank">HAI-LLM</a> training framework</li><li><strong>JULY 18, 2023:</strong> <a href="https://arxiv.org/abs/2307.09288" rel="noopener noreferrer" target="_blank">Llama 2</a> released - 2T token runs - 7B to 70B models</li><li><strong>AUGUST 9, 2023:</strong> <a href="https://on.ft.com/40BWNea" rel="noopener noreferrer" target="_blank">Reports</a> of Chinese cloud providers stockpiling GPUs</li><li><strong>SEPTEMBER 23, 2023: </strong><a href="https://mistral.ai/news/announcing-mistral-7b/" rel="noopener noreferrer" target="_blank">Mistrial 7B</a> - rumored <a href="https://x.com/ManuelFaysse/status/1706949891358859624" rel="noopener noreferrer" target="_blank">8T</a> tokens - beats Llama 2 7B</li><li><strong>OCTOBER 23, 2023:</strong> Biden administration phase 1 GPU restrictions effective</li><li><strong>DECEMBER 11, 2023:</strong> <a href="https://mistral.ai/news/mixtral-of-experts/" rel="noopener noreferrer" target="_blank">Mixtrial of Experts</a> - open source MoE 8x7B (12.9B active)</li></ul><p><br></p><p>Likely compounded/leveraged on top of their Mistrial 7B runs/checkpoints</p><p><br></p><ul><li><strong>JANUARY 5, 2024:</strong> <a href="https://arxiv.org/abs/2401.02954" rel="noopener noreferrer" target="_blank">DeepSeek V1</a> - 2T token runs - 7B &amp; 67B models</li></ul><p><br></p><p>Very similar to Llama 2 runs, uses GQA but deeper instead of wider models.</p><p><br></p><ul><li><strong>MARCH 8, 2024:</strong> <a href="https://arxiv.org/abs/2403.05530" rel="noopener noreferrer" target="_blank">Gemini 1.5</a> - close source acknowledgement of MoE in frontier model</li><li><strong>MAY 7, 2024:</strong> <a href="https://arxiv.org/abs/2405.04434" rel="noopener noreferrer" target="_blank">DeepSeek V2</a> - 8T run - 236B MoE (21B active) close to Llama 2 70B performance</li></ul><p><br></p><p>DeepSeek starts varying from the pack substantially…</p><ol><li><strong>New form of attention: </strong>Introduced MLA, improvement alone ~80%+ reduction in KV cache memory requirements (compared to comparable GQA). When combined with other memory optimizations, DeepSeek claims 93.3% reduction in KV cache.</li><li><strong>New form of MoE: </strong>Introduced more flexible form of MoE with shared experts while still using auxiliary loss for load balancing</li><li>With these improvements, compared to a dense 67B model, DeepSeek claims ~578% inference throughput improvement.</li></ol><p><br></p><ul><li><strong>JULY 31, 2024:</strong> <a href="https://arxiv.org/abs/2407.21783" rel="noopener noreferrer" target="_blank">Llama 3</a>: META continuing their scaling program to 405B while increasing to 15T of tokens trained.&nbsp;</li><li><strong>DECEMBER 27, 2024:</strong> <a href="https://arxiv.org/abs/2412.19437v1" rel="noopener noreferrer" target="_blank">DeepSeek V3</a>: scaling up 15T run 671B (with 37B active) close to frontier models, GPT-4o, LLama 3 405B, Claude 3.5 sonnet.</li></ul><p>DeepSeek continues their drive for training and inference efficiencies, faced with chips constraints (primarily memory bandwidth, as H800’s have ½ the bandwidth of H100’s)... the path now is clearly their own:</p><ol><li><strong>Improved MoE training:</strong> without auxiliary loss (learned routing, vs. heuristic)</li><li><strong>Multi-token prediction objective during training:</strong> taking a lead from speculative decoding, though not used in inference</li><li><strong>DualPipe training pipeling/scheduling framework: </strong>reduces bubble and communication bottlenecks during training</li><li><strong>FP8 training dynamics:</strong> while accumulating in full precision without substantial loss in quality to reduce memory bottlenecks</li><li><strong>NVIDIA SM’s allocation adjustments: </strong>Low level adjustment to allocate SM’s only for communication to reduce bandwidth bottleneck</li><li><strong>Modular inference infra [4]: </strong>2 inference infra setups specialized in (A) prompt processing (input tokens) and then (B) sampling (output tokens) to separate and scale to fit workloads and optimize distributed batch processing at scale.&nbsp;</li></ol><ul><li><strong>JANUARY 22, 2025: </strong><a href="https://arxiv.org/abs/2501.12948" rel="noopener noreferrer" target="_blank"><strong>DeekSeek R1</strong></a><strong>: </strong>R1 model built on top of DeepSeek V3, close to OpenAI o1 performance and enables test-time compute regime</li></ul><p><br></p><p><br></p><h2>LLM development is a compounding phenomenon. </h2><p><br></p><p>Labs leverages their last generation of models and builds on top of them. This is seen countless times again and again.&nbsp;</p><ul><li>Mixtrial 8x7B is a MoE of their 7B (rumored).&nbsp;</li><li>Llama 3 models use Llama 2 models for filtering and data curation.&nbsp;</li><li>Gemini 1.5 teams used Gemini 1.0 generation models for evaluations, data curation, and hyperparameter extrapolations. Gemini 1.5 Flash distilled from Gemini 1.5 Pro.</li><li>DeepSeek V3 used DeepSeek V2.5 models for data generation in post-training.&nbsp;</li><li>DeepSeek R1 is built on top of V3 base model.</li></ul><p><br></p><p>DeepSeek V2 release in May 2024 was the beginning of DeepSeek charting its own path, rather than simply imitating others.</p><p>Its V3 release very much cemented their self-efficiency in continuing to progress LLM research. From DualPipe, multi-token prediction, FP8 training dynamics without little loss in quality, and finally to providing 2 full pages of hardware “suggestions” to “hardware designers” (read NVIDIA), these are not behaviors of imitators.&nbsp;</p><p><br></p><h1><strong>R1: Reinforcement learning and test-time compute will accelerate inference demands</strong></h1><p><br></p><p>DeepSeek R1 claims to be on par with OpenAI o1/3 in benchmarks. LLM expert users are already <a href="https://simonwillison.net/2025/Jan/20/deepseek-r1/" rel="noopener noreferrer" target="_blank">impressed</a> and running these models on different infra and model configurations (DeepSeek released R1 distillations down to 1.5B size, which can run on most modern MacBook Pro’s with 16+GB memory).&nbsp;</p><p><br></p><p>As reinforcement learning will require some method of evaluation (to give feedback to the system whether something is correct or not), inference as a type of workload will only continue to increase.&nbsp;</p><p><br></p><p>For OpenAI, in the middle of 2024 (before the release of o1), inference costs are already <a href="https://www.datacenterdynamics.com/en/news/openai-training-and-inference-costs-could-reach-7bn-for-2024-ai-startup-set-to-lose-5bn-report/" rel="noopener noreferrer" target="_blank">dominating</a> training costs (~$7B in compute costs, ~$4B for inference towards ChatGPT). This shifts from training to inference workloads will accelerate, to include inference workloads targeted for research for test-time compute regime models (evaluations, sample generation, etc..).</p><h1><br></h1><h1><strong>Hundreds of Billions of CapEx in 2024-2026: Will only accelerate model scale ups and intelligence progress</strong></h1><p><br></p><blockquote>"I will say that Deep Learning has a legendary ravenous appetite for compute, like no other algorithm that has ever been developed in AI. You may not always be utilizing it fully but I would never bet against compute as the upper bound for achievable intelligence in the long run. Not just for an individual final training run, but also for the entire innovation / experimentation engine that silently underlies all the algorithmic innovations."</blockquote><p><br></p><p>– <a href="https://x.com/karpathy/status/1883941452738355376" rel="noopener noreferrer" target="_blank">Andrej Karpathy</a></p><p><br></p><p>With <a href="https://github.com/hkust-nlp/simpleRL-reason" rel="noopener noreferrer" target="_blank">open</a> <a href="https://huggingface.co/blog/open-r1" rel="noopener noreferrer" target="_blank">replications</a> of R1 findings either in progress or complete, inference demands will only accelerate. Any model can become a reasoning model; where you can adjust a “knob” to increase test-time compute to get a more reliable answer.</p><p><br></p><p>Labs will be more comfortable scaling up when inference is cheaper with new forms of attention (MLA) and MoE’s. The test-time compute regime gives consumers of models the power of accuracy as a function of cost.&nbsp;</p><p><br></p><p>How much do you care about a more accurate response for a given task? $1k? $6k? $15k? This can now be within your control.</p><p><br></p><p>---</p><p><br></p><p>[^1]:&nbsp;Whether it was META open sourcing their work with LLAMA 4, or the community finding the needle in the haystack of <a href="https://github.com/srush/awesome-o1" rel="noopener noreferrer" target="_blank">speculation/experiments</a>.</p><p><br></p><p>[^2]:&nbsp;See interviews from <a href="https://www.chinatalk.media/p/deepseek-from-hedge-fund-to-frontier" rel="noopener noreferrer" target="_blank">2023</a> and <a href="https://www.chinatalk.media/p/deepseek-ceo-interview-with-chinas" rel="noopener noreferrer" target="_blank">2024</a> with the High Flyer CEO Liang Wenfeng.</p><p><br></p><p>[^3]:&nbsp;See High Flyer AI blog <a href="https://www.high-flyer.cn/en/blog/" rel="noopener noreferrer" target="_blank">posts</a> going back to 2019</p><p><br></p><p>[^4]: This is probably not entirely <a href="https://www.cursor.com/blog/llama-inference#how-exactly-are-closed-source-models-cheaper" rel="noopener noreferrer" target="_blank">novel</a>, and already being done by most inference providers, prompt processing can be incredibly cheap and speed up inference when done with enough volume in the workload. Though DeepSeek’s wide MoE footprint will make extra benefit from this modular approach by increased parallelism across nodes (as shown by their minimal 40-node sampling setup).&nbsp;</p>]]>
    </description>
    <link>https://johnnyclee.com/i/q6bGDu4MqPr/</link>
    <itunes:episodeType>full</itunes:episodeType>
  </item>
  <item>
    <title>Embracing the new era of computing, communication ... and energy</title>
    <guid>EFlLO18J4MF</guid>
    <pubDate>Wed, 22 Jan 2025 20:33:55 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<blockquote>We're still in punchcard era of LLMs, designing prompts, copy pasting context around, hitting go, reading the thing, prompting occasionally. Pretty lame. If there are fewer than a few thousand tok/s of sustained throughput generated on my behalf do we even have AI</blockquote><p><br></p><p>- <a href="https://x.com/karpathy/status/1878896895839642040" rel="noopener noreferrer" target="_blank">Andrej Karpathy</a></p><p><br></p><h2>Making it easier to communicate -- Attention</h2><p><br></p><p>If we take a short journey back in time, generative AI (LLMs, diffusion models, etc..) started to emerge in the late 2010's, primarily driven by the clever introduction of attention into existing neural network architectures.</p><p><br></p><p>At the time in 2014, researchers were still trying to make deep learning work for <a href="https://arxiv.org/abs/1409.0473" rel="noopener noreferrer" target="_blank">machine</a> <a href="https://arxiv.org/pdf/1409.3215" rel="noopener noreferrer" target="_blank">translation</a>. Ultimately, attention was a clever mechanism to help models adopt a method of communication between parts of a sequence (whether its nodes within the same vector i.e. self-attention, or nodes across different vectors i.e. cross-attention, etc...). Since 2017 transformers formally appeared as a new neural network architecture, it has transformed into a powerful tool partially because it was possible to parallelize the computation of attention across the modern computing accelerators (GPUs, TPUs). Then, it was possible to make bigger models, and train them on more data... i.e. scaling pre-training.</p><p><br></p><p>Scaling large language models was a slow start and took almost half a decade: the GPT's (1-3), BERT, T5, LaMDA, PaLM, etc... from <a href="https://epoch.ai/data/large-scale-ai-models" rel="noopener noreferrer" target="_blank">2018 to 2022</a>.</p><p><br></p><p>But progress has been tremendous, and the quality of models has improved dramatically from new entrants and incumbents alike: GPT-4, GPT-4o, Claude, Gemini, Llama, Mistral, Cohere, Qwen, DeepSeek, O1, etc... until today.</p><p><br></p><h2>Measuring progress, the year of benchmarks and evals</h2><p><br></p><p>In addition to the cost of inference, measuring progress is becoming more difficult as the models are getting better and better and beating many contemporary benchmarks.</p><p><br></p><p>To measure a model's ability to reason with acquired knowledge from training, <a href="https://arxiv.org/abs/2009.03300" rel="noopener noreferrer" target="_blank">MMLU</a> was released in 202. In 2024, models were reaching 90+% on MMLU. In 2024, <a href="https://arxiv.org/abs/2406.01574" rel="noopener noreferrer" target="_blank">MMLU Pro</a> was released to provide more difficult tasks, and in early 2025, models were already scoring 80+% on MMLU Pro.</p><p><br></p><p>For coding and software engineering, <a href="https://www.swebench.com" rel="noopener noreferrer" target="_blank">SWE-bench</a> was released as realistic real world software engineering tasks in 2024, and in early 2025, models were already scoring 70+% on SWE-bench.</p><p><br></p><p>Leading AI labs have always been transparent about engaging the community to create new novel benchmarks/evals to evaluate their models. Recently, some of these labs have been criticized for waiting to disclose <a href="https://techcrunch.com/2025/01/19/ai-benchmarking-organization-criticized-for-waiting-to-disclose-funding-from-openai/" rel="noopener noreferrer" target="_blank">funding</a> to fund new benchmarks. Personally, I doubt there was any bad intent.</p><p><br></p><p>It's clear that existing benchmarks are being saturated. Progress is increasingly subjective to the type of task and domain. 2025 will be the year of evals; perhaps new standards and systematic methods will emerge between the labs and the community.</p><p><br></p><h2>Curse of success -- scaling inference supply to meet demand vs. research progress</h2><p><br></p><p>Frontier AI labs have had tremendous success in distributing their models to the public. OpenAI <a href="https://www.nytimes.com/2024/09/27/technology/openai-chatgpt-investors-funding.html" rel="noopener noreferrer" target="_blank">expects</a> $3.7B and $11.6B in sales in 2024 and 2025. Azure, AWS, GCP, Oracle Cloud, and others had fantastic AI-driven tailwinds in their cloud computing businesses [1].</p><p><br></p><p>For research organizations like OpenAI, there is a rising <a href="https://www.wsj.com/tech/ai/open-ai-division-for-profit-da26c24b" rel="noopener noreferrer" target="_blank">tension</a> between compute for inference vs. compute for research.</p><p><br></p><p>At the same time, the cost of measuring progress is becoming more difficult--and costly. At the end of 2024, for the o3 model's ARC benchmark, OpenAI spent at least $1M-2M [2] to run a single <a href="https://arcprize.org/blog/oai-o3-pub-breakthrough#:~:text=The%20amount%20of%20compute%20was%20roughly%20172x%20the%20low%2Dcompute%20configuration." rel="noopener noreferrer" target="_blank">benchmark</a> (once!).</p><p><br></p><p>Through model development, it's understood as a training run progresses, evals are run at intervals to measure progress of training. If we assumed that while training o3, it took 5 ablation experiments, for each experiment it took the equivalent of 10,000 training steps. Then, if the ARC benchmark was run at every 1,000 step, then it would have run 5 x 10 = 50 times. 50 x $1.5M is $75M (!!). While this is pure speculation in the details, it's a good proxy to understand the cost of measuring progress. For $75M (which is some X% of the cost of o3 development cost, as it <strong>does not</strong> include GPU hours for <strong>training</strong> the model), one can train a GPT-2 class (circa 2019 generation LLM) model ~110,000 times [3].</p><p><br></p><p>Yet, there's a lot of research progress to be excited about.</p><p><br></p><h3>Progressing transformer architectures</h3><p><br></p><p>Simply within the transformer architecture, DeepSeek's developments mostly from DeepSeek (<a href="https://arxiv.org/abs/2412.19437" rel="noopener noreferrer" target="_blank">V3</a> and <a href="https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf" rel="noopener noreferrer" target="_blank">R1</a>) in MLA (new form of attention), MoE routing without auxiliary loss (taking inspiration to allow models to learn routing vs. heuristics), multi token prediction (incorporating the insights from speculative decoding), RL pipeline for reasoning with R1 (progressing to the first open source test-time compute frontier class model) show that a small team that's not distracted with inference demands can accelerate the rate of experimentation from ideas [4]. Their DualPipe distributed training improvement shows GPU continued algorithmic improvements in resource allocation/scheduling to maximize hardware use (they also made other clever adaptations like FP8 mixed precision to reduce memory use, as H800 has lower memory resources).</p><p><br></p><p>Meta's findings with <a href="https://arxiv.org/abs/2412.09871" rel="noopener noreferrer" target="_blank">byte latent transformer</a> shows that there may be a path away from fixed tokenizers and vocabs and they can be learned. Tokenizers can be blamed for a lot of problems with LLMs (i.e. the infamous how many R's are in the strawberry question). This could also provide a path to consume more data in training without using heuristics to pre-process data into sequences.</p><p><br></p><p>DeepSeek's models and Meta's Llama both acknowledge using the previous generation's model to generate data to progress to the next generation of models, signs of compounding progress. Previous work compounds and yet open sourcing allows the entire community to progress at a lower global capital cost which accrues value to more of the community.</p><p><br></p><h3>Reducing compute burden for the same tasks and performance</h3><p><br></p><p>Other than continuing to optimize and progress the transformer architecture, there are other ways to reduce the compute burden for the same tasks and performance.</p><p><br></p><p>There are many teams working on a variety of bets, some cool examples:</p><p><br></p><ol><li>State space models (non-attention based architectures): <a href="https://github.com/state-spaces/mamba/" rel="noopener noreferrer" target="_blank">Mamba</a>, <a href="https://github.com/BlinkDL/RWKV-LM" rel="noopener noreferrer" target="_blank">rwkv</a>, etc.. to reduce the attention quadratic runtime complexity.</li><li>Extreme quantization: <a href="https://github.com/microsoft/BitNet" rel="noopener noreferrer" target="_blank">BitNet</a> models are 1.6 bit models (with 4 bit activations) which are much cheaper to run and can run on CPU's as matrix multiplications are removed.</li></ol><p><br></p><p>For the leading AI labs and hyperscalers, they are racing to build more data centers designed for AI workloads. In the short term, compute remain in a supply crunch (primarily due to chips shortage--see <a href="https://www.google.com/finance/quote/NVDA:NASDAQ?hl=en&amp;window=5Y" rel="noopener noreferrer" target="_blank">NVIDIA stock price</a>). Data center construction has <a href="https://thedailyshot.com/wp-content/uploads/US-Constr-Spend-Data-Centers2501030320.png" rel="noopener noreferrer" target="_blank">skyrocketed</a> with the inflow of capital. The next physical constraint will be energy.</p><p><br></p><h2>Energy and AI</h2><p><br></p><p>Vaclav Smil published a book in 2022 called "How the World Really Works: The Science Behind How We Got Here and Where We're Going". The short answer: energy. Taking aside how Smil may feel about the progress/solutions of the energy transition, it is clear that our modern world--particularly the developed world--runs on energy.</p><p><br></p><blockquote>An average inhabitant of the Earth nowadays has at their disposal nearly 700 times more useful energy than their ancestors had at the beginning of the 19th century.</blockquote><blockquote>[...]</blockquote><blockquote>Translating the last rate into more readily imaginable equivalents, it is as if an average Earthling has every year at their personal disposal about 800 kilograms (0.8 tons, or nearly six barrels) of crude oil, or about 1.5 tons of good bituminous coal. <strong>And when put in terms of physical labor, it is as if 60 adults would be working non-stop, day and night, for each average person; and for the inhabitants of affluent countries this equivalent of steadily laboring adults would be, depending on the specific country, mostly between 200 and 240.</strong></blockquote><p><br></p><p>-- Vaclav Smil, How the World Really Works (emphasis mine)</p><p><br></p><p>The continued adoption and development of AI will require more energy than ever.</p><p><br></p><p>In 2025, Microsoft announced $80B of <a href="https://blogs.microsoft.com/on-the-issues/2025/01/03/the-golden-opportunity-for-american-ai/" rel="noopener noreferrer" target="_blank">capex</a> for AI data centers. In January 2025, The White House, OpenAI, Softbank, and Oracle announced a <a href="https://apnews.com/article/trump-ai-openai-oracle-softbank-son-altman-ellison-be261f8a8ee07a0623d4170397348c41" rel="noopener noreferrer" target="_blank">$500B</a> investment in data centers and energy over the next 4 years. Amazon expected $75B in <a href="https://seekingalpha.com/article/4731865-amazon-com-inc-amzn-q3-2024-earnings-call-transcript#:~:text=Yeah%2C%20I'll%20take%20the%20capex%20part%20of%20that.%20As%20Brian%20said%20in%20his%20opening%20comments%2C%20we%20expect%20to%20spend%20about%20$75%20billion%20in%202024.%20I%20suspect%20we'll%20spend%20more%20than%20that%20in%202025.%20And%20the%20majority%20of%20it%20is%20for%20AWS%20and%20specifically%2C%20the%20increased%20bumps%20here%20are%20really%20driven%20by%20Generative%20AI." rel="noopener noreferrer" target="_blank">capex</a> in 2024 mostly related to AWS, only to grow in 2025. The trend is the same for other players like GCP, Meta, etc...</p><p><br></p><p>AI data centers are joining the ranks of other items in the energy transition (i.e. electric vehicles, etc...). Forecasts of AI energy demands vary widely (from ~2x to 5x current data center energy demands by 2030). BloombergNEF's Michael Liebreich lays out a more nuanced <a href="https://about.bnef.com/blog/liebreich-generative-ai-the-power-and-the-glory/" rel="noopener noreferrer" target="_blank">perspective</a> in this new generation of data center growth: we've seen this before but market dynamics, stakeholders governance, and energy consumption efficiency will all play a factor at moderating demand and supply.</p><p><br></p><p>Personally, I'm optimistic that physical constraints won't bottleneck the rate of progress derived from adding more compute. Capital is moving rapidly to balance the supply, and energy constraints are likely to be mitigated by more algorithm and hardware improvements (similar to the 2000's and 2010's as cloud computing adoption took off, when similar stakeholders cried out for more energy, but as a proportion of US energy demand, data center energy consumption grew relatively gradually due to more efficient hardware and software design).</p><p><br></p><h2>AI and the Physical World -- How Humans Communicate With Machines</h2><p><br></p><p>In 2025, in the developed world, most consumers spend their time on their phones and laptops. Let's look at how humans do a common task: shopping on Amazon's mobile app.</p><p><br></p><h3>Tapping on a phone screen: Shopping on Amazon</h3><p><br></p><p>Each user interaction follows a carefully orchestrated flow:</p><p><br></p><p>1. Physical Input → Mobile OS</p><ul><li>User taps or swipes generate touch events</li><li>OS interprets and routes events to the application layer</li></ul><p>2. App ↔ Server Communication</p><ul><li>App sends HTTP/TCP requests to backend servers (i.e. Amazon ecommerce backends with product listings, ads, etc...)</li><li>Servers process requests (database lookups, payment validation)</li><li>Communication is via the internet backbone (i.e. TCP/UDP connections) that route through a host of hardware and network software; not to mention the cryptography to ensure the data is secure.</li></ul><p>3. Server ↔ Services</p><ul><li>Servers coordinate with other systems (inventory, payments, authentication, etc...)</li><li>Data flows through multiple service layers to aggregate the information needed to respond to the user's request.</li></ul><p>4. Response → User Interface</p><ul><li>Results return to device via the internet backbone</li><li>App updates UI based on new state</li><li>User perceives change and decides next action</li></ul><p><br></p><p>Each loop is tightly scoped and optimized, typically requiring only 100,000 to 100,000,000 FLOPs [5] per iteration of this loop . The system is engineered for speed and responsiveness through these <strong>small, discrete steps</strong>, which are then repeated in very rapid succession hundreds or thousands of times in a normal user session.</p><p><br></p><h3>The Human Factor: Bearing the Cognitive Load</h3><p><br></p><p>While the visible computation is relatively lightweight, humans shoulder most of the cognitive burden:</p><p><br></p><p>1. Persistent and Context-Aware Computation</p><ul><li>Users must constantly perceive and interpret their environment</li><li>Humans translate intent into discrete interface actions</li></ul><p>2. Interface Navigation</p><ul><li>Humans learn and adapt to predetermined UI patterns</li><li>Users bridge gaps between their intent, environment, time, and available actions</li></ul><p><br></p><p>The system's efficiency comes from delegating most adaptive intelligence to the human user, keeping machine computation minimal but requiring significant human cognitive work.</p><p><br></p><p>Yet, the efficiency didn't come for free. On the other side of the user consuming the interface, the interface was designed by other humans on top of a stack of software and hardware to enable the loop: chipset, operating system, network, application, etc...</p><p><br></p><p>The fixed cost is amortized over the many users that consume the interface over its lifetime.</p><p><br></p><h3>LLM Inference: A Different Paradigm</h3><p><br></p><p>Large language models with 70B parameters take a contrasting approach, requiring 20,000,000,000,000 to 200,000,000,000,000 FLOPs [6] per inference - 200,000-2,000,000x times more than typical app interactions. However, they enable open-ended, natural language communication in a single pass.</p><p><br></p><p>With a fixed defined vocabulary space, the model can consume sequences of arbitrary length (up to a fixed length) and output a sequence of arbitrary length (up to a fixed length).</p><p><br></p><p>This is unlike programming, where there are syntax constraints that will limit the runtime/execution of the program with the underlying stack.</p><p><br></p><h3>Can new AI systems balance the communication burden?</h3><p>The evolution of human-computer interaction may point to a future where we're gradually shifting cognitive load from humans to machines. Traditional interfaces require humans to:</p><p><br></p><ul><li>Learn specific interaction patterns</li><li>Maintain context and state</li><li>Translate high-level goals into discrete steps</li></ul><p><br></p><p>The integration of LLMs into system architectures could happen at multiple levels:</p><p><br></p><p>1. Application Layer</p><ul><li>LLMs could augment existing interfaces as an intelligent assistance layer</li><li>Natural language could complement rather than replace traditional UI elements</li><li>UI's can be fluid and generated on the fly depending on input from the user and the environment</li></ul><p>2. Framework Layer</p><ul><li>Web and mobile frameworks could incorporate LLM-powered components</li><li>Development tools could use LLMs to generate more adaptive interfaces easily</li></ul><p>3. System Layer</p><ul><li>Operating systems could employ LLMs for more intelligent resource management</li><li>System calls and memory allocation could become more context-aware (i.e. hardware aware, workload aware, etc...)</li><li>Kernel operations could adapt to usage patterns and requirements instead of using heuristics</li></ul><h3><br></h3><h3>Remembering attention is a form of communication</h3><p><br></p><p>Large language models are marvels of deep learning. Modern human language is excellent at compressing information. Attention mechanisms allowed machines to learn via communication between many many different nodes of a sequence in multidimensional spaces.</p><p><br></p><p>If we move the unit of analysis from language and sequences to humans and the physical world, where else could we leverage the added benefits of learned machine communication?</p><p><br></p><h2>In the LLM paradigm: Learned data processing vs. pre-determined data processing</h2><p><br></p><p>Since the explosion of AI, industry has been <a href="https://www.blackstone.com/insights/article/the-convergence-of-data-centers-and-power-a-generational-investment-opportunity-the-connection/#:~:text=Figure%201:%20Data%20Created%2C%20Consumed%20and%20Stored" rel="noopener noreferrer" target="_blank">predicting</a> the explosion of data generated/stored/consumed. At 2024 NeurIPS, Ilya Sutskever <a href="https://neurips.cc/virtual/2024/test-of-time/105032" rel="noopener noreferrer" target="_blank">predicted</a> that the era of pre-training is over, but data continues to be the fossil fuel of the AI era.</p><p><br></p><p>But if we extend the previous section's analysis of human-computer interaction, we can see that data today is mostly generated and consumed in small discrete steps/iterations. Media content is slightly more continuous (i.e. videos, audio) but the data are still discretely packaged, collected, and consumed at the user's direction.</p><p><br></p><p>Since 2024, the frontier models have also expanded in modalities (both input and output; image, audio, etc...) and inference speed (i.e. realtime voice/video API's from OpenAI and Google).</p><p><br></p><p>There are applications of transformers where the inputs and outputs are more continuous in nature, such as in the classic self-driving car example: Waymo Research's 2024 <a href="https://arxiv.org/pdf/2404.19531" rel="noopener noreferrer" target="_blank">work</a> on transformer driven trajectory prediction–taking realtime continuous perception and scene data inputs to predict future motion trajectories of objects in the environment.&nbsp;</p><p>Yet, the data is still discretized and processed into features and tokens to be made into a sequence for processing; researchers spent effort empirically to find the best way to process the data into a sequence to achieve the best performance.</p><p><br></p><p>To add new modalities and formats of data to models, models have to be re-trained or trained for longer with the new data in a different way. The vast majority of this is still determined by humans based on empirical heuristics.</p><p><br></p><p>Perhaps, this is the reason why I have a lot of excitement around ideas like Meta's <a href="https://arxiv.org/pdf/2412.09871" rel="noopener noreferrer" target="_blank">byte latent transformer</a>. All data can be encoded into a sequence of bytes, and the model can learn to process the data in a way that is optimal for the task. The caveat is that this will likely require larger compute budgets than labs are willing to experiment with (especially when the current methods are working well for their existing use cases and users).</p><p><br></p><h2>Other interesting questions/possibilities in the LLM paradigm</h2><p><br></p><p>Because LLMs are so good at taking arbitrary inputs and generating usable outputs, if computation budgets are not a concern, it's natural to ask how we can use LLMs to generate arbitrary outputs on the fly <strong>all the time</strong>.&nbsp;</p><p><br></p><p>Instead of having to hire an engineer to write web applications, we can use LLMs to generate the web application (this is also related to the idea that software agents will probably be the 1st proven class of agents from LLMs).</p><p><br></p><p>As opposed to the fixed cost / economies of scale dynamics of current software development, LLMs can perhaps increase the accessibility of software development to the masses; but at the same time change the cost dynamics of fixed vs variable costs in software.&nbsp;</p><p><br></p><p>This is unlikely to be adopted for all use-cases, there are many use-cases where fixed costs / economies of scale are still king (i.e. most of all of the web applications we use today). But there are certainly a long tail of un-designed bespoke software applications that can be invented (but was never invented because it had a user of 1, or the user was already doing it manually and charging a very high rate for the service, etc...).</p><p><br></p><p>Bespoke software will become more accessible; it already is an enormous market (think how much system integrators like Accenture make per year; they are the definition of providing a labor force to develop customized software for their corporate clients).</p><p><br></p><p>---</p><p><br></p><p>[1]: Azure and Oracle provide GPUs to OpenAI, while also providing LLM inferences services separately. AWS provide inference for open source models and Anthropic models, while powering Anthropic's GPU clusters. GCP likewise serves as a cloud provider for its own Gemini models and other open source models.</p><p><br></p><p>[2]: The <a href="https://arcprize.org/blog/oai-o3-pub-breakthrough#:~:text=Here%20are%20the%20results." rel="noopener noreferrer" target="_blank">results</a> reported retail costs of $2,012 and $6,677 ($8,689) for the low compute configuration, with a note the high compute configuration cost roughly 172x the low compute configuration. 172 times $8,689 is $1,494,508 (~$1.5M). Now, these are retail costs, so the actual costs can possibly be 25-50% lower when considering the actual cost to OpenAI with its optimized inference stack and wholesale hardware costs.</p><p><br></p><p>[3]: In 2024, Karpathy <a href="https://github.com/karpathy/llm.c/discussions/677" rel="noopener noreferrer" target="_blank">reproduced</a> GPT-2 using llm.c for $672 in 24 hours of training.</p><p><br></p><p>[4]: The DeepSeek team is quite secretive even within China, they are based in Hangzhou, and are not that well known even within China. They predominantly hire younger talent from domestic universities (vs. the approaches of other China AI labs that hire more senior talent from Western education backgrounds). There's an argument to be made that (behind the backdrop of GPU constraints and cost constraints), the added focus of a fast talented team enabled it to basically conduct more experiments and ablations to produce more empirically impactful results.</p><p><br></p><p>[5]: FLOPs here is being use as an umbrella term (for simplicity) to includes integer operations, etc.. that may occur when completing instructions. This is a very rough estimation that is not meant to be precise, there are many variables that could impact the estimate: cryptography costs, network memory movement costs, ML/recommendation systems, etc...</p><p><br></p><p>[6]: Again, this is a very rough estimation, with the assumption of 70B parameters and 100 to 1000 tokens of total tokens processed/generated. It can easily be much higher with greater context length and longer output.</p>]]>
    </description>
    <link>https://johnnyclee.com/i/EFlLO18J4MF/</link>
    <itunes:episodeType>full</itunes:episodeType>
  </item>
  <item>
    <title>At the top of the learning curve for Generative AI</title>
    <guid>U1V0UNVs61a</guid>
    <pubDate>Fri, 15 Nov 2024 01:56:39 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<h3><strong>The mass adoption of generative AI models has yet to arrive.</strong></h3><p><br></p><p>Its growth will be driven by fundamental factors as the technology starts its journey down the learning curve, lowering the cost in developing and deploying models towards every general application. In the next 3-7 years, generative AI models will be as commonplace as gradient boosting and decision tree models for prediction and classification tasks.&nbsp;</p><p><br></p><p>By way of a simple analogy about the retail industry:</p><p><br></p><p class="ql-indent-1">Within an apparel retailer, there are 2 critical business functions: branding &amp; marketing, supply chain &amp; distribution. High performance supply chain operations and distribution is often buttressed by the accuracy of the firm’s demand and supply forecasts. Over-forecasting demand could easily wipe out the firm’s quarterly profits. Under-forecasting demand will greatly limit growth.</p><p><br></p><p class="ql-indent-1">Prior to the arrival of modern sample and compute efficient ML techniques (i.e. XGBoost and random forest) for predicting time series data, leading global retailers (i.e. Nike, P&amp;G, LLBean) crafted demand and supply forecasting models by hand. Even in the early 2000’s and 2010’s, these firms hired enormous teams of business analysts and managers to fine-tune and manage supply chain and distribution forecasts.&nbsp;</p><p><br></p><p>Yet, by the late 2010’s, a novice computer programmer can easily train an highly accurate demand/supply forecast model (based on gradient descent) with a few lines of code and a small historical dataset. These models power the modern e-commerce world (i.e. Amazon, Walmart, etc..) predicting the demand and supply of their product lines to the minute and hour.</p><p><br></p><p>While the large language models have disrupted the world, the underlying mechanics and intuitions have not changed. The primary difference is that transformer-based LLM’s require substantially more (1) compute, and (2) data; many orders of magnitude more of both ingredients for the model to learn as generalized models.</p><p><br></p><h3><strong>The barriers to development will continue to decline.</strong></h3><p><br></p><p>Compute supply is currently constrained. Yet, chip supply is classically cyclical (over the past 40-60 years), and it will soon enter a cycle of oversupply. There will continue to be systems engineering constraint problems to be solved; but I am hopeful that the current rate of capital investment will be sufficient to unlock these barriers (i.e. the physical practicality of a $10B datacenter vs. a $100B datacenter in terms of connectivity, power, and management).</p><p><br></p><p>Data will be a more multi-faceted and complex problem; there are at least 3 factors: (1) specialized/fit-for-purpose data, (2) volume of world representational data, and (3) systems engineering to ease the distribution and ingestion of data in large-scale training.&nbsp;</p><p><br></p><p>Specialized data (#1) and volume of world representational data (#2) are somewhat orthogonal to each other, but it is important to understand them together.</p><p><br></p><p class="ql-indent-1">Specialized data (#1) is a pre-existing constraint of ML for current users and firms; nothing has fundamentally changed about whether a firm or user has high-quality proprietary data or not. If a firm has historical data relevant to its products, then it is absolutely in the best position to use that data to develop and deploy generative AI applications.</p><p><br></p><p class="ql-indent-1">World representational data (#2) is a continuous research problem. It is known that current datasets are not exhaustive and comprehensive of our world; yet there is some fear of “running out of data”. Capital continues to flow to firms which conduct foundational research for data formats, modalities, and techniques to represent our entire world.&nbsp;</p><p><br></p><p class="ql-indent-1">Yet, with the sliver of world representational data (mostly from the internet), leading research labs have already shown the tremendous potential of large models by the current releases. While there is likely some declining returns to scale in a continuous fashion, I am hopeful that there will be some step-level discontinuity improvement in this area from modalities and format research. For example, neural networks are notoriously bad at representing 3-dimensions, various tricks are utilized to navigate this problem; yet humans only experience the world in 3-dimensions. Humans learn representations with much greater sample efficiency.</p><p><br></p><p>Scaling up data and compute infrastructure (#3) is a classic systems problem that is being solved by firms like Databricks and leading AI labs. This is unlikely to be a primary constraint to value creation.</p><p><br></p><h3><strong>The accumulation of value creation from generation AI will accrue to the users and the rest of the global economy.</strong></h3><p><br></p><p>As these barriers decline, training and/or deploying generative AI models and applications will become commonplace. We are likely far from reaching the bottom of the learning curve for this category of technology. As a result, leading model builders today must rely on their distribution-driven economies of scale to grow the market and retain share.</p><p><br></p><p>Fortunately, because we are still the top of the curve and in the early innings, there is still much to do to realize this vision. So much has yet to be built, and many opportunities lie ahead.</p><p><br></p>]]>
    </description>
    <link>https://johnnyclee.com/i/at-the-op-of-the-learning-curve-for-generative-ai-U1V0UNVs61a/</link>
    <itunes:image href="https://cdn.johnnyclee.com/main-johnnyclee-com/production/images/item-2cf4dd340ff38d1e783de290c80d0c89.png"/>
    <itunes:episodeType>full</itunes:episodeType>
  </item>
  <item>
    <title>Renting clothes in Japan, &quot;Speak Now&quot;, and Jobs</title>
    <guid>M5zdcAOltcG</guid>
    <pubDate>Sun, 09 Jul 2023 07:00:00 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<h3>Recent fun news</h3><h3><br></h3><p>Save on baggage,&nbsp;<strong>rent clothes when visiting Japan, says Japan Airlines (JAL)</strong>. Travelers visiting Japan via JAL are being offered a new service called&nbsp;<a href="https://on.ft.com/46CRflX" rel="noopener noreferrer" target="_blank">“Any Wear, Anywhere”</a>&nbsp;to ditch their heavy baggage and pack lighter. Through the service, you can rent six tops and three bottoms for 6,000 yen, or about $45, for two weeks. The airline believes this experiment can reduce luggage weight per flight, leading to less fuel use.</p><p><strong>“Speak Now”: Taylor Swift</strong>&nbsp;continues to re-record her old albums, releasing the latest version on July 7, 2023. Ms. Swift has since re-recorded 3 out of the first 6 of her albums. Shamrock Holdings—a private equity firm owned by the estate of Roy E. Disney—bought the rights to those albums for $300M in 2020. Perhaps unsurprisingly, each release of a re-recorded album lifts Taylor’s entire catalog in the streaming charts (original and re-recorded). As music right holders are paid by streaming counts,&nbsp;<strong>each releases provide a large injection of music sales for both Ms. Taylor and Shamrock Holdings.</strong></p><p><strong>Jobs report:</strong>&nbsp;U.S. job creation in June was 209,000, falling short of the projected 225,000. This marks the first time in 14 months that actual numbers feel short of consensus, according to&nbsp;<a href="https://twitter.com/bespokeinvest/status/1677296435115577345/photo/1" rel="noopener noreferrer" target="_blank">Bespoke Research</a>.</p><p><strong>Pension funds start unloading private equity assets.&nbsp;</strong>The New York State Teacher’s Retirement system is looking to unload&nbsp;<a href="https://www.bloomberg.com/news/articles/2023-06-28/ny-teachers-pension-looks-to-sell-6-billion-of-private-assets" rel="noopener noreferrer" target="_blank">$6 billion of assets</a>&nbsp;into the secondary market. Private equity funds are usually close-ended for 8-10 years, these movements in secondaries suggest the smart money wants to take profits while the marks are still frothy.</p><p><br></p><h3>This week’s Speed Read</h3><h3><br></h3><p>1.&nbsp;&nbsp;&nbsp;&nbsp;<strong>Some updates on office commercial real estate (CRE):</strong>&nbsp;Office commercial properties are started an upward trend towards high delinquencies since Jan 2023, from&nbsp;<strong>~2% to ~4%.</strong></p><p>2.&nbsp;&nbsp;&nbsp;In Q1 2023, offices dominated CRE foreclosures (<strong>~63% of all CRE foreclosures</strong>).</p><p>3.&nbsp;&nbsp;&nbsp;&nbsp;<strong>Higher quality office spaces fare substantially better, many metros (Boston, D.C., Manhattan) even continuing to see positive net absorption</strong>&nbsp;(incremental occupied squared footage) since 2021.</p><p>4.&nbsp;&nbsp;&nbsp;CMBS (commercial mortgage-backed securities) market also signals similar discontinuities through bond spreads between b/w higher credit rating (Triple-A) vs. lower credit rating (Triple-B) bonds.&nbsp;<strong>The spread is ~6%, which is nearing March 2020 pandemic levels.</strong></p><p>5.&nbsp;&nbsp;&nbsp;&nbsp;<strong>Weekly bankruptcy filings moving back up</strong>&nbsp;to near GFC (2008 global financial crisis) and March 2020 (pandemic) levels.</p><p>6.&nbsp;&nbsp;&nbsp;Inflation stubbornly not going away,&nbsp;<strong>with core PCE around ~4%.</strong></p><p>7.&nbsp;&nbsp;&nbsp;Wall Street continues to revise to higher inflation expectations in 2023 and 2024.&nbsp;<strong>2023 consensus estimate at ~4.4%</strong>.</p><p>8.&nbsp;&nbsp;&nbsp;Global temperatures hit another&nbsp;<strong>record high on July 6, 2023, 17.23 Celsius</strong>.</p><p><br></p><h3>Speed Read Charts</h3><h3><br></h3><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff332b41e-5f53-4c39-8263-bcc356ae088b_468x344.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff332b41e-5f53-4c39-8263-bcc356ae088b_468x344.png"></a></p><p><br></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd46c958b-414a-4467-816b-55bd9c134415_468x349.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd46c958b-414a-4467-816b-55bd9c134415_468x349.png"></a></p><p><br></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcce03ada-f221-4c4b-b21c-48d122d00f9c_468x263.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcce03ada-f221-4c4b-b21c-48d122d00f9c_468x263.png"></a></p><p><br></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94512355-8f41-4eda-a857-abfdc4fa8991_468x333.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F94512355-8f41-4eda-a857-abfdc4fa8991_468x333.png"></a></p><p><br></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2550e309-068d-4ac9-a962-ed1768b95b65_468x263.jpeg" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2550e309-068d-4ac9-a962-ed1768b95b65_468x263.jpeg"></a></p><p><br></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F836fefe2-98ec-4cfa-a3b8-b46d89e9bf23_468x266.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F836fefe2-98ec-4cfa-a3b8-b46d89e9bf23_468x266.png"></a></p><p><br></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50a23f44-36a2-4dfd-bbea-e03de74e3c3f_468x262.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50a23f44-36a2-4dfd-bbea-e03de74e3c3f_468x262.png"></a></p><p><br></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2050826-fd1b-4d7a-8b97-aa332a236a29_400x648.jpeg" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2050826-fd1b-4d7a-8b97-aa332a236a29_400x648.jpeg"></a></p><p><br></p><h3>This week’s fun fact</h3><p><br></p><p>On May 4, 2023, the Federal Reserve’s FOMC (federal open markets committee) increased the federal funds rate by 25 basis points to 5.00% to 5.25%.</p><p>As of Jul 9, 2023, the market has priced in a 93% probability that the committee will raise the range another 25 basis points at the Jul 26, 2023 meeting.</p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F365f1c65-1f57-48b8-9a4f-087d675abc48_977x489.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F365f1c65-1f57-48b8-9a4f-087d675abc48_977x489.png"></a></p>]]>
    </description>
    <link>https://johnnyclee.com/i/M5zdcAOltcG/</link>
    <itunes:episodeType>full</itunes:episodeType>
  </item>
  <item>
    <title>Bidenomics, AI money, problems for bankers</title>
    <guid>jMprKyVAifH</guid>
    <pubDate>Sun, 02 Jul 2023 07:00:00 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<h3>Recent fun news</h3><h3><br></h3><p>This week, the White House unveiled a new push for “<strong>Bidenomics”&nbsp;</strong>(<a href="https://www.whitehouse.gov/wp-content/uploads/2023/06/WH-Dunn-Donilon-Memo-on-Bidenomics-2023.06.26.pdf" rel="noopener noreferrer" target="_blank">press memo</a>). In Philadelphia a few weeks ago, President Biden apparently&nbsp;<a href="https://www.whitehouse.gov/briefing-room/speeches-remarks/2023/06/17/remarks-by-president-biden-at-a-political-rally-hosted-by-union-members-philadelphia-pa/" rel="noopener noreferrer" target="_blank">said</a>&nbsp;he does not know “what the hell it is.” From the memo, it mentions jobs, middle class, infrastructure, and it keeps saying it is working?&nbsp;</p><p><strong>More money for chips and NVIDIA.</strong>&nbsp;Money continues to flow to generative AI startups making foundational models. MosaicML&nbsp;<a href="https://www.wsj.com/articles/databricks-strikes-1-3-billion-deal-for-generative-ai-startup-mosaicml-fdcefc06?st=z16s8l4wgrumu08&amp;reflink=desktopwebshare_permalink" rel="noopener noreferrer" target="_blank">exited</a>&nbsp;for $1.3B to Databricks. Inflection AI&nbsp;<a href="https://tcrn.ch/3Xy2doX" rel="noopener noreferrer" target="_blank">announced</a>&nbsp;finishing a $1.3B round of funding, valuing at $4B.&nbsp;</p><p>Cumulative funding raised by foundational model startups:</p><ol><li>OpenAI $11.3B</li><li>Inflection AI $1.525B</li><li>Anthropic $1.5B</li><li>Cohere $445M</li><li>Adept $415M</li><li>Runway $237M</li><li>Character.ai $150M</li><li>Stability AI $100M</li></ol><p><strong>Orcas have been intentionally ramming into ships on the ocean.&nbsp;</strong>The&nbsp;<a href="https://www.smithsonianmag.com/smart-news/orca-rams-into-yacht-near-scotland-behavior-may-be-spreading-180982429/" rel="noopener noreferrer" target="_blank">behavior&nbsp;</a>has spread from the seas around the Iberian peninsula to the North Sea between Scotland and Norway, almost 2,000 miles away. Many theories, no answers.</p><p><strong>Wall Street continues to downsize.</strong>&nbsp;Goldman Sachs cuts&nbsp;<a href="https://www.bloomberg.com/news/articles/2023-06-23/goldman-sachs-begins-cutting-about-125-of-its-managing-directors?utm_source=website&amp;utm_medium=share&amp;utm_campaign=copy" rel="noopener noreferrer" target="_blank">125 Managing Directors</a>globally. JPMorgan cuts&nbsp;<a href="https://www.bloomberg.com/news/articles/2023-06-23/jpmorgan-cuts-40-dealmakers-in-north-america-amid-global-cull?utm_source=website&amp;utm_medium=share&amp;utm_campaign=copy" rel="noopener noreferrer" target="_blank">40 dealmakers</a>in North America. The Swiss bank UBS plans to cut&nbsp;<a href="https://www.bloomberg.com/news/articles/2023-06-27/ubs-preparing-to-cut-more-than-half-of-credit-suisse-workforce#xj4y7vzkg" rel="noopener noreferrer" target="_blank">30,000 jobs</a>&nbsp;this year.</p><p>Move aside Silicon Valley Bank, FDIC data shows&nbsp;<strong>Bank of America</strong>’s&nbsp;<a href="https://www.ft.com/content/df4f343c-5666-43a2-ba01-ef315bfb119a?shareType=nongift" rel="noopener noreferrer" target="_blank">~$100B in paper losses</a>&nbsp;from its bond investments made during the pandemic-driven deposit increase. By comparison, SVB had a ~$16B&nbsp;<a href="https://www.wsj.com/articles/rising-interest-rates-hit-banks-bond-holdings-11668123473?st=osvzvt2db0ge4y7&amp;reflink=desktopwebshare_permalink" rel="noopener noreferrer" target="_blank">loss</a>&nbsp;in its held-to-maturity portfolio. Bank of America’s bet on securities is leading it to trail its leading competitor (see number 7 in Speed Read), JPMorgan—who decided to sit on its pandemic cash instead.</p><p><br></p><p>Thanks for reading Myriad Perspectives! Subscribe for free to receive new posts and support my work.</p><p><br></p><p>Subscribed</p><h3>This week’s Speed Read</h3><h3><br></h3><p>1.&nbsp;&nbsp;&nbsp;&nbsp;The&nbsp;<strong>monthly mortgage payment</strong>&nbsp;for a new average purchase loan size is close to&nbsp;<strong>$3,000</strong>, nearly 2x from ~$1,000 to ~$1,500 range from 2000 to 2019.</p><p>2.&nbsp;&nbsp;&nbsp;&nbsp;Every&nbsp;<strong>single inflation forecast</strong>&nbsp;from the&nbsp;<strong>Federal Reserve</strong>&nbsp;since June 2021 has been wrong.</p><p>3.&nbsp;&nbsp;&nbsp;&nbsp;In emerging countries (i.e. Brazil, India, Mexico), the&nbsp;<strong>cost of capital to build renewable energy infrastructure</strong>&nbsp;(i.e. a solar far) is&nbsp;<strong>~2x higher</strong>&nbsp;than developed countries—EU’s ~4% cost of capital vs. India’s ~10% cost of capital.</p><p>4.&nbsp;&nbsp;&nbsp;&nbsp;For the first time, in Q1 2023,&nbsp;<strong>China</strong>&nbsp;topped Japan to be&nbsp;<strong>world’s largest exported of automobiles</strong>.</p><p>5.&nbsp;&nbsp;&nbsp;&nbsp;Morgan Stanley thinks the US will see its&nbsp;<strong>first negative payroll month</strong>&nbsp;around the end of this summer (August/September).</p><p>6.&nbsp;&nbsp;&nbsp;&nbsp;Since the start of the Ukraine war,&nbsp;<strong>~25% of Russia’s crude oil</strong>&nbsp;continued flow to Western countries in&nbsp;<strong>the EU, the US, and the UK</strong>, through the conduits of Turkey, India, UAE, and China</p><p>7.&nbsp;&nbsp;&nbsp;&nbsp;In Q1 2023,&nbsp;<strong>Bank of America’s net interest rate spread</strong>&nbsp;(the difference between the interest rate it earns from its loans and assets, and the interest rate it pays to depositors) was&nbsp;<strong>1.43%</strong>&nbsp;compared to JPMorgan’s&nbsp;<strong>2.04%</strong>&nbsp;(42% higher).</p><p><br></p><h3>Speed Read Charts</h3><h3><br></h3><ol><li><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86088427-d7b6-4145-b00d-eff5a40f5dc1_468x311.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86088427-d7b6-4145-b00d-eff5a40f5dc1_468x311.png"></a></li><li><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F685ec985-c3bb-44e9-bb20-088cb8c029c0_1280x720.jpeg" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F685ec985-c3bb-44e9-bb20-088cb8c029c0_1280x720.jpeg"></a></li><li><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81840d91-2d61-4731-bc8d-833a38c91327_443x579.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F81840d91-2d61-4731-bc8d-833a38c91327_443x579.png"></a></li><li><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5fc0280c-2d28-4cc4-a57e-a8cb3674a001_468x610.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5fc0280c-2d28-4cc4-a57e-a8cb3674a001_468x610.png"></a></li><li><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21c6f033-84fb-4595-982f-abc37883c958_468x397.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21c6f033-84fb-4595-982f-abc37883c958_468x397.png"></a></li><li><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4176ff87-2704-4539-a9ff-7f4fd8275145_468x624.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4176ff87-2704-4539-a9ff-7f4fd8275145_468x624.png"></a></li><li><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79b2e25c-d30d-4be6-a949-9d8049804868_468x624.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79b2e25c-d30d-4be6-a949-9d8049804868_468x624.png"></a></li></ol><p><br></p><h3>This week’s fun fact</h3><h3><br></h3><p>France has laid&nbsp;<a href="https://ig.ft.com/subsea-cables/" rel="noopener noreferrer" target="_blank">the most undersea cables</a>, closely followed by the United States.</p><p>Each of these countries have laid more than 500 thousand kilometers of cables, more than&nbsp;<strong>1.3x the distance from Earth to the Moon</strong>.</p>]]>
    </description>
    <link>https://johnnyclee.com/i/jMprKyVAifH/</link>
    <itunes:episodeType>full</itunes:episodeType>
  </item>
  <item>
    <title>Taylor Swift boon for hotels? Masayoshi Son is back.</title>
    <guid>K_tkflEDv57</guid>
    <pubDate>Sun, 25 Jun 2023 07:00:00 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<h3>Recent fun news</h3><p>Taylor Swift tours are very good for hotels, termed the&nbsp;<strong>“TSwift Lift”</strong>. ~80% revenue bump in Nashville, and 10-20% bump in large metros like Chicago and NYC. I guess scalpers are not the only winners. San Francisco hotel operators can rejoice once Ms. Swift arrives at Levi Stadium later in July.</p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F294b5949-10c7-4975-8d36-1946ef76db46_711x772.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F294b5949-10c7-4975-8d36-1946ef76db46_711x772.png"></a></p><p><strong>Masayoshi Son returns</strong>&nbsp;with his signature hyped slides at this year’s Softbank shareholder meeting. AI will solve everything? Find its full glory&nbsp;<a href="https://group.softbank/system/files/pdf/ir/investors/shareholders/2023/shareholders-meeting_43_04_en.pdf" rel="noopener noreferrer" target="_blank">here</a>.</p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F74b63987-f45a-455e-9924-c2e8692b9793_1215x885.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F74b63987-f45a-455e-9924-c2e8692b9793_1215x885.png"></a></p><p>Thanks for reading Myriad Perspectives! Subscribe for free to receive new posts and support my work.</p><p><br></p><h3>This week’s Speed Read</h3><p>1.&nbsp;&nbsp;&nbsp;&nbsp;Commercial credit card rates topped&nbsp;<strong>20.1%</strong>, highest since ~1970.</p><p>2.&nbsp;&nbsp;&nbsp;&nbsp;New business applications remain elevated at&nbsp;<strong>~175%</strong>&nbsp;pre-pandemic levels.</p><p>3.&nbsp;&nbsp;&nbsp;&nbsp;U.S. home listings continue to sink to&nbsp;<strong>lowest level on record</strong>&nbsp;since ~2012.</p><p>4.&nbsp;&nbsp;&nbsp;&nbsp;Uneven home price level effects across regions: SF Bay Area generally&nbsp;<strong>down ~12%</strong>, whereas NYC&nbsp;<strong>neutral ~0%</strong>, Miami&nbsp;<strong>up 8%.</strong></p><p>5.&nbsp;&nbsp;&nbsp;&nbsp;~80% chance&nbsp;<strong>moderate</strong>&nbsp;El Nino, ~50% chance&nbsp;<strong>strong</strong>&nbsp;El Nino this winter.</p><p>6.&nbsp;&nbsp;&nbsp;&nbsp;Global sea surface temperatures have been&nbsp;<strong>abnormally high</strong>&nbsp;(+1.1 degrees from 40-year average) in 2023 so far.</p><p>7.&nbsp;&nbsp;&nbsp;Since Medicaid started unwinding COVID-19 renewal provisions,&nbsp;<strong>~1.5M enrollees</strong>&nbsp;have been disenrolled (~300k in Florida, ~150k in Arizona).</p><h3>Speed Read Charts</h3><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7922c15-92f3-4c7e-95ff-305c3e2589c6_564x451.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7922c15-92f3-4c7e-95ff-305c3e2589c6_564x451.png"></a></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9df656c5-6db0-47d8-9757-d73a7c02ac52_800x800.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9df656c5-6db0-47d8-9757-d73a7c02ac52_800x800.png"></a></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5e51bd6-f2f7-4db3-92be-b22ee5489ea4_1024x717.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5e51bd6-f2f7-4db3-92be-b22ee5489ea4_1024x717.png"></a></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d7d98f6-edae-487c-b771-a52990e5338e_683x328.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d7d98f6-edae-487c-b771-a52990e5338e_683x328.png"></a></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe651dde4-73b7-478f-89b1-1df4dc57feb8_564x296.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe651dde4-73b7-478f-89b1-1df4dc57feb8_564x296.png"></a></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F34f86589-9323-4765-8427-a2efbc208fe8_564x398.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F34f86589-9323-4765-8427-a2efbc208fe8_564x398.png"></a></p><p><a href="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F003b69fb-395c-4ab0-afa2-0e9fbbaa8c24_468x367.png" rel="noopener noreferrer" target="_blank"><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F003b69fb-395c-4ab0-afa2-0e9fbbaa8c24_468x367.png"></a></p><h3>This week’s fun fact</h3><p>Ensign Peak Advisors, Inc manages $124B in assets for the Church of Latter-day Saints, also known as the Mormon Church.</p>]]>
    </description>
    <link>https://johnnyclee.com/i/K_tkflEDv57/</link>
    <itunes:episodeType>full</itunes:episodeType>
  </item>
  <item>
    <title>Full faith and credit of the United States</title>
    <guid>QFVlOIRffdo</guid>
    <pubDate>Fri, 26 May 2023 05:00:00 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<p>The strange world of a potential default on U.S. Treasuries</p><p><br></p><p><strong>Risk-free rate</strong></p><p>In 1964, William F. Sharpe introduced a theory of capital asset pricing[1], commonly referred as Capital Asset Pricing Model (CAPM). Today, CAPM is the foundation for modern financial asset price theory[2]. For his work, Professor Sharpe of Stanford University received the 1990 Nobel Prize in Economics for his foundational contribution to financial economics[3].</p><p>The CAPM assumes the existence of a type of riskless asset. If an investor invests in a riskless asset, the investor is guaranteed to receive a rate of return, called the risk-free rate.</p><p>This risk-free rate is a key input in the pricing of near all types of financial assets, including stocks, debt, options, and real estate. A fundamental assumption in valuations, the cost of capital is calculated from an assumed risk-free rate.</p><p>While choosing the best risk-free rate is quite literally an academic exercise[4]. The global financial system, for the most part, uses U.S. treasury yields[5]&nbsp;as proxies for the risk-free rate.</p><p>The global financial system assumes U.S. treasury debt is a type of riskless asset. In financial lingo, U.S. treasuries have zero credit risk. The investor assumes they will always receive their money on time:</p><ul><li>Risk of U.S. treasuries defaulting = 0</li></ul><p><br></p><p><strong>So, what if risk &gt; 0?</strong></p><p>The United States federal government has never defaulted on its debt. Such an event has never been observed by the world. It will be unprecedented.</p><p>Since January 19, 2023, the United States federal government had been unable to issue more debt beyond its $31.381 trillion statutory limit[6]. As a result, the Department of Treasury projects that the federal government will run out of money on June 1, 2023[7]. If the debt limit is not raised by then, the United States will default on its debt.</p><p>The frightening reality is simple. The world likely will still refer to U.S. treasuries as the predominant proxy for riskless assets.</p><p>As a result, if the U.S. defaults on its debt, it is quite likely that investors will buy more U.S. treasuries.</p><p>Why? In a world of risky assets, U.S. treasuries will remain the safest type of asset, despite of a default. In financial uncertainty, asset managers will run to safe assets, which ironically will be U.S. treasuries[8].</p><p><br></p><p><strong>Will markets function?</strong></p><p>Beyond the heavy financial pricing theory talk, if the U.S government defaults, there are simple questions that are fundamental to the largest and most liquid capital market in the world: the market for U.S. treasuries.</p><ul><li>Will investors get advance notice of a certain delay in payment? Will it be 1-day or more?</li><li>When may payments resume? Will the Treasury Department provide compensation for defaulted Treasuries?</li><li>Will the Federal Reserve accept defaulted Treasuries as collateral?</li><li>Will other banks and parties continue to accept defaulted Treasuries as collateral?</li><li>Which types of Treasuries will default first?</li><li>Are defaulted Treasuries transferable?</li><li>Will money market funds be forced to sell Treasuries in an event of a default?</li></ul><p>While some banks[9]&nbsp;seem to offer answers to its clients on these questions, it is my hope that we never have to find out whether their answers were correct.</p><p>In 2015, the Swiss National Bank suddenly stopped pegging the Swiss franc (CFC) to the Euro (EUR).</p><p>Some computers at trading firms continued to assume:</p><ul><li>1 CFC = 1 EUR.</li></ul><p>Calamity ensued, instantly bankrupting firms due to lack of funds and/or liquidity[10].</p><p>Let us hope the Congress decides to pay its debts, and we do not have to find out how many computers hard coded:</p><ul><li>Risk of U.S. treasuries defaulting = 0</li></ul><p><br></p><p>[1]&nbsp;<a href="https://doi.org/10.1111/j.1540-6261.1964.tb02865.x" rel="noopener noreferrer" target="_blank">https://doi.org/10.1111/j.1540-6261.1964.tb02865.x</a></p><p>[2]&nbsp;This is not to say CAPM is completely accurate. Notably, in 1993, Fama and French from the University of Chicago identified other factors that should be considered besides the risk of the market portfolio.&nbsp;<a href="https://doi.org/10.1016/0304-405X(93)90023-5" rel="noopener noreferrer" target="_blank">https://doi.org/10.1016/0304-405X(93)90023-5</a></p><p>[3]&nbsp;<a href="https://www.nobelprize.org/prizes/economic-sciences/1990/press-release/" rel="noopener noreferrer" target="_blank">https://www.nobelprize.org/prizes/economic-sciences/1990/press-release/</a></p><p>[4]&nbsp;You can peruse Google Scholar for the mountain of work debating the best risk-free rate in every circumstance.</p><p>[5]&nbsp;<a href="https://home.treasury.gov/resource-center/data-chart-center/interest-rates/TextView?type=daily_treasury_yield_curve&amp;field_tdr_date_value_month=202305" rel="noopener noreferrer" target="_blank">https://home.treasury.gov/resource-center/data-chart-center/interest-rates/TextView?type=daily_treasury_yield_curve&amp;field_tdr_date_value_month=202305</a></p><p>[6]&nbsp;<a href="https://home.treasury.gov/system/files/136/Debt-Limit-Letter-to-Congress-20230119-McCarthy.pdf" rel="noopener noreferrer" target="_blank">https://home.treasury.gov/system/files/136/Debt-Limit-Letter-to-Congress-20230119-McCarthy.pdf</a></p><p>[7]&nbsp;<a href="https://home.treasury.gov/system/files/136/Debt-Limit-Letter-to-Congress-Members-20230522-McCarthy.pdf" rel="noopener noreferrer" target="_blank">https://home.treasury.gov/system/files/136/Debt-Limit-Letter-to-Congress-Members-20230522-McCarthy.pdf</a></p><p>[8]&nbsp;Not just my word, the word of a survey of asset managers conducted by JPMorgan Chase, as part of “Q&amp;A on a US Treasury Technical Default” (May 19, 2023, JPMorgan Chase Fixed Income). Excerpt can be found on&nbsp;<a href="https://www.ft.com/content/e4639f71-b69f-4d99-867d-dffd1733779d" rel="noopener noreferrer" target="_blank">https://www.ft.com/content/e4639f71-b69f-4d99-867d-dffd1733779d</a></p><p>[9]&nbsp;<a href="https://www.bloomberg.com/opinion/articles/2023-05-23/can-markets-handle-the-debt-ceiling" rel="noopener noreferrer" target="_blank">https://www.bloomberg.com/opinion/articles/2023-05-23/can-markets-handle-the-debt-ceiling</a></p><p>[10]&nbsp;<a href="https://www.wsj.com/articles/snb-shocks-bankers-and-markets-1421342951" rel="noopener noreferrer" target="_blank">https://www.wsj.com/articles/snb-shocks-bankers-and-markets-1421342951</a></p>]]>
    </description>
    <link>https://johnnyclee.com/i/QFVlOIRffdo/</link>
    <itunes:image href="https://cdn.johnnyclee.com/main-johnnyclee-com/production/images/item-965274e0caf18b4cc8f90bb09fffaad1.jpg"/>
    <itunes:episodeType>full</itunes:episodeType>
  </item>
  <item>
    <title>Responsible AI adoption at the world’s most systemically important bank</title>
    <guid>Ubx0xYoOW8I</guid>
    <pubDate>Sat, 06 May 2023 15:00:00 GMT</pubDate>
    <itunes:explicit>false</itunes:explicit>
    <description>
      <![CDATA[<p><strong>1&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Background</strong></p><p><br></p><p>Since the debut of OpenAI’s ChatGPT in November 2022, much of the world has been enamored with the astonishing and—sometimes—frightening capabilities of artificial intelligence. Public and private entities are racing to understand the technology’s implications on society and business.&nbsp;</p><p><br></p><p>On May 4, 2023, the Biden administration summoned executives from leading AI companies to the White House to discuss ways to mitigate potential harms of AI[1]. At the same time, the administration’s director of CISA (Jen Easterly) and director of cybersecurity at the NSA (Rob Joyce) both called AI a game-changing and era-defining technology posing new cyber challenges to the world[2]. Director Easterly testified to the House Homeland Security Committee, compared AI to be the same level of threat and challenge as China[3].</p><p><br></p><p>In the private sector, while many companies are rapidly adapting to the AI-enabled features and operating models, many others are starting to disclose in SEC filings that AI technology is a fundamental risk factor to their existing business models[4]. On May 2, 2023, the threat of ChatGPT to Chegg’s underlying education-technology business model led to a 38% selloff of their shares[5]. Clearly, the impact of AI technology will not be equal for all parties involved.</p><p><br></p><p>Timely with recent banking turmoil due to tightening credit conditions in the U.S. financial system, this document will attempt to analyze the benefits and risks from adopting AI at the world’s most systemically important bank, JPMorgan Chase.</p><p><br></p><p><strong>2&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;JPMorgan Chase</strong></p><p><br></p><p>Since the Global Financial Crisis of 2008, the G20's Financial Stability Board has published an annual ranking of systemically important banks. In 2022, JPMorgan Chase remained at the top of that list, alone in the highest capital buffer requirement category[6]. The global financial system is incredibly interconnected, with the banking sector working closely with global capital markets to provide credit and liquidity to the world. Because the world’s banks mostly operate as a fractional reserve banking system[7], large bank failures can have catastrophic contagion effects on the world.</p><p><br></p><p>At the end of March 31, 2023, JPMorgan Chase had $3.74 trillion in assets, including $1.13 trillion in issued loans. Its clients have $2.38 trillion of deposits at the bank[8]. By comparison, the United States Department of Defense had $3.52 trillion in assets at the end of Sept 2022[9].</p><p><br></p><p>JPMorgan Chase is already investing in leveraging AI technology similarly to ChatGPT, known as large language models (LLM’s)[10].</p><p><br></p><p><strong>3&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;AI: Large language models</strong></p><p><br></p><p>To unpack the AI buzzword, it is important to understand the intuition behind the technology powering AI chatbots. These chatbots are powered by one or more large language models (LLMs).&nbsp;</p><p><br></p><p><strong>3.1&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;What are large language models?</strong></p><p><br></p><p>Perhaps unsurprisingly, AI is not magic, it is mostly made of many linear calculations[11]. Large language model is a type of neural network machine learning model. It is trained to understand words and their linguistic structure (“language”). By converting words into numbers, it can reduce linguistic patterns into statistics by training on hundreds of billions of words from written documents. These models can capture dense information and linguistic structures in billions of parameters—hence they are “large” language models.&nbsp;</p><p><br></p><p>Most importantly, recently advances in LLMs rely primarily on a type of neural network architecture called “Transformers”, which was open sourced by Google in a 2017 paper called “Attention Is All You Need”[12]. For more details on how these models are trained, The Economist recently wrote a wonderful layperson’s explanation of large language models[13].</p><p><br></p><p>Intuitively, your interaction with an AI chatbot can be broken down into these steps:</p><p>1.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;User enters a message to the chatbot. [Input: Words]</p><p>2.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;The model converts the words into numbers, which represent the words. [Input: Words; Output: Matrix of numbers]</p><p>3.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;The model uses the matrix of numbers to sequentially predict the “next” most likely number(s) (which are just a representation of words). [Input: Numbers; Output: Numbers]</p><p>4.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;The model converts the predicted numbers back into words for the reply! [Input: Numbers; Output: Words]</p><p><br></p><p><br></p><p><strong>3.2&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Beyond Chatbots</strong></p><p><br></p><p>While consumers are most familiar the chatbot application of language models, language models have been in your lives for many years. Google’s helpful predictive tools in Gmail, Google Docs, Google Sheets are all powered by language models in some form[14]. You simply had no chat interface to the model.</p><p><br></p><p>Beyond chatbots, LLMs have great generalizable attribute. Most recently, Columbia Business School professor Dan Wang[15]&nbsp;compared LLMs’ generalizability to electricity. It may be a horizontal layer of generalized compute that can enable many not-yet-created inventions—like how electricity led to the microprocessor.&nbsp;</p><p><br></p><p>An early glimpse into the generalizable capabilities of LLMs (beyond text-generation) can be demonstrated in Microsoft’s recent paper called HuggingGPT[16]. The paper demonstrates LLMs like GPT-4 can be used to create and orchestrate tasks to be done by other AI models and computers. Empowered, the model no longer have to solely rely on its own abilities.&nbsp;&nbsp;</p><p><br></p><p><strong>4&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Beneficial applications in banking</strong></p><p><br></p><p>In the case of JPMorgan Chase, there are likely a long list of potential AI use-cases. Instead of an exhaustive review, we will simply focus on use-cases that are intuitively easier to understand.</p><p><br></p><p><strong>4.1&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Internal usage</strong></p><p><br></p><p>Internal usage of AI at a firm like JPMorgan can be framed in two ways: directly applied on core business processes, or indirectly applied by employees responsible for business processes.</p><p><br></p><p>Here are examples of direct applications (processing previously too-complex documents):</p><p>1.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Loan underwriting: LLMs can process complex loan/mortgage applications directly in the underwriting process.</p><p>2.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Know your customer (KYC): LLMs can discover attributes and generate customer profiles from internal and external records to comply with KYC regulations.</p><p><br></p><p>Here are examples of indirect applications (empowering employees):</p><p>1.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Summarization: Employees can use LLMs to generate regular internal updates and reports.</p><p>2.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Data entry: Employees can use LLMs to automate their daily data-entry tasks.</p><p><br></p><p><strong>4.2&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Client service usage</strong></p><p><br></p><p>Beyond internal usage, client services usage is probably the most exciting, as banks are primarily financial services firms. JPMorgan Chase can leverage LLMs create new products for its clients and interact with clients in whole new ways.</p><p><br></p><p>Examples of new products:</p><p>1.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;AI wealth management product that delivers portfolio-level market reports daily</p><p>2.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;AI private banker that can do complex tasks like managing mortgage application and closing processes (previously only available to HNW clients of the private bank)</p><p><br></p><p>Example of new ways to interact with clients:</p><p>1.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Investment bankers can use 1,000s of previous pitchbooks to generate the most compelling pitchbook for the next debt/equity offering.</p><p>2.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Client service can rely on AI chatbots as opposed to a call center agent.</p><p><strong>&nbsp;</strong></p><p><strong>5&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Firm-level risks</strong></p><p><br></p><p>While the potential of LLMs for JPMorgan Chase seem unlimited, there are also important risks to consider at the firm-level. Primarily, two types of risks are at the forefront: data security and global compliance.</p><p><br></p><p><strong>5.1&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Data security&nbsp;</strong></p><p><br></p><p>Trust is the most important attribute for a bank, especially at the world’s most important systemic bank. Clients rely on the firm to, not only protect their personal data, but protect the data that underlies their cash deposits. In the digital age, cash deposits are simply bits of data that are stored in a very trusted and reliable database. If anything were to happen to that database, then everyone’s deposits may be at risk.&nbsp;</p><p><br></p><p>Therefore, banking data and client information at the firm are likely frequently targeted by adversaries. These adversaries may be seeking both financial and/or political gain. For a firm like JPMorgan, they likely face adversaries that have enormous capabilities, like state actors.&nbsp;</p><p><br></p><p>The usage of LLMs present a new challenge to maintain the firm’s data security standards. While LLMs seem powerful, they are also very nascent. Their technological vulnerabilities are only beginning to be studied by system experts[17].</p><p><br></p><p>Deployed pre-maturely without proper safeguards, LLMs can lead to poor performance (at best) or data loss and manipulation (at worst). For example:</p><p>1.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Poor underwriting performance: It may be possible to manipulate loan documents to inject specific words that can “trick” a LLM processing loan applications to underwrite loans below acceptable credit guidelines.&nbsp;</p><p>2.&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Data loss and manipulation: Criminals may inject malicious code and prompts into known repositories of LLM training and input data. Then, the malicious code and prompt may lead to arbitrary code execution in key banking data processes. This may lead to data loss, manipulation, or theft.</p><p><br></p><p><br></p><p><strong>5.2&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Global compliance</strong></p><p><br></p><p>Because JPMorgan is a global multi-national financial institution. In addition to United States banking regulations, it must comply with the regulations in all regions in which it operates. These regulations may be financial in nature, or they may be related to corporate, commercial, or information regulations.&nbsp;</p><p><br></p><p>Already a complex web of compliance requirements, JPMorgan had historically spent billions of dollars annually to keep up with requirements[18]. Most recently, it was hit with a $200 million dollar fine for lapse in compliance for keeping WhatsApp records[19]. Suffice to say, compliance is a major risk factor for the firm.</p><p><br></p><p>In the United States, there exist nearly zero guidelines or regulators for the usage of AI. The regulatory bodies are only beginning to catch on[20]. The firm faces regulatory risks of investing in AI technology that will later be banned or severely restricted.&nbsp;</p><p><br></p><p>Internationally, the differential regulatory landscape may make it difficult for the firm to keep up with the complexity of regulations. Some European authorities (i.e., Italy) are already whimsical in their authorities and approach[21].</p><p><br></p><p><strong>6&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Systemic risks</strong></p><p><br></p><p>In addition to firm-level risks, JPMorgan will have to contend with systemic risks if it suffers failures or breaches because of its AI usage. With more than 10% of all American banking deposits, a loss of confidence in the firm can and will lead to widespread panic.&nbsp;</p><p><br></p><p>Bank runs are primarily caused by panic. A bank run on JPMorgan Chase will lead to bank runs on the worlds’ banks.&nbsp;</p><p><br></p><p>This will have unprecedented consequences, so severe the world may never adequately prepare for such an event. The designation of “too big to fail” banks are exactly because it is not possible to prepare for their failures.</p><p><br></p><p><strong>7&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Closing thoughts</strong></p><p><br></p><p>The rapid adoption of AI technologies by private and public actors is likely to fundamentally change society and business. But its impacts will not be equal across the board.&nbsp;</p><p><br></p><p>As discussed, in the public sector, authorities continue to be ill-prepared to adequately balance the risk and reward calculus of AI on society. The White House is scrambling to catch up.</p><p><br></p><p>In the private sectors, winners (i.e., Microsoft) and losers (i.e., Chegg) are already apparent. There will be many more winners, and no doubt, many more losers.&nbsp;</p><p><br></p><p>Systemic actors like JPMorgan Chase have a responsibility to lead with example and caution. Financial services, an industry built on information technology, can easily be disrupted and lead to short-term chaos. Its leaders should balance its desire to continue to lead while its responsibility as the world’s most important bank.</p><p><br></p><p>[1]&nbsp;<a href="https://www.whitehouse.gov/briefing-room/press-briefings/2023/05/04/background-press-call-on-new-artificial-intelligence-announcements/" rel="noopener noreferrer" target="_blank">https://www.whitehouse.gov/briefing-room/press-briefings/2023/05/04/background-press-call-on-new-artificial-intelligence-announcements/</a></p><p>[2]&nbsp;<a href="https://www.washingtonpost.com/politics/2023/05/02/us-officials-say-ai-will-be-big-cyberthreat-how-itll-materialize-is-less-clear/" rel="noopener noreferrer" target="_blank">https://www.washingtonpost.com/politics/2023/05/02/us-officials-say-ai-will-be-big-cyberthreat-how-itll-materialize-is-less-clear/</a></p><p>[3]&nbsp;<a href="https://www.c-span.org/video/?527701-1/cyber-director-testifies-threats-landscape" rel="noopener noreferrer" target="_blank">https://www.c-span.org/video/?527701-1/cyber-director-testifies-threats-landscape</a></p><p>[4]&nbsp;<a href="https://9fin.com/insights/chatgpt-is-the-new-risk-factor-buzzword" rel="noopener noreferrer" target="_blank">https://9fin.com/insights/chatgpt-is-the-new-risk-factor-buzzword</a></p><p>[5]&nbsp;<a href="https://www.bloomberg.com/news/articles/2023-05-02/chegg-plunges-after-warning-that-ai-is-threatening-its-business" rel="noopener noreferrer" target="_blank">https://www.bloomberg.com/news/articles/2023-05-02/chegg-plunges-after-warning-that-ai-is-threatening-its-business</a></p><p>[6]&nbsp;<a href="https://www.reuters.com/business/finance/jp-morgan-remains-worlds-biggest-systemically-important-bank-2022-11-21/" rel="noopener noreferrer" target="_blank">https://www.reuters.com/business/finance/jp-morgan-remains-worlds-biggest-systemically-important-bank-2022-11-21/</a></p><p>[7]&nbsp;<a href="https://www.philadelphiafed.org/-/media/frbp/assets/institutional/education/lesson-plans/purposes-and-functions-grades-9-12.pdf" rel="noopener noreferrer" target="_blank">https://www.philadelphiafed.org/-/media/frbp/assets/institutional/education/lesson-plans/purposes-and-functions-grades-9-12.pdf</a></p><p>[8]&nbsp;<a href="https://www.jpmorganchase.com/content/dam/jpmc/jpmorgan-chase-and-co/investor-relations/documents/quarterly-earnings/2023/1st-quarter/88617d8a-a183-45a7-acd9-eea77b439879.pdf" rel="noopener noreferrer" target="_blank">https://www.jpmorganchase.com/content/dam/jpmc/jpmorgan-chase-and-co/investor-relations/documents/quarterly-earnings/2023/1st-quarter/88617d8a-a183-45a7-acd9-eea77b439879.pdf</a></p><p>[9]&nbsp;<a href="https://comptroller.defense.gov/Portals/45/Documents/afr/fy2022/4-Financial_Section.pdf" rel="noopener noreferrer" target="_blank">https://comptroller.defense.gov/Portals/45/Documents/afr/fy2022/4-Financial_Section.pdf</a></p><p>[10]&nbsp;<a href="https://www.jpmorganchase.com/content/dam/jpmc/jpmorgan-chase-and-co/investor-relations/documents/events/2023/jpmc-investor-day-2023/global-technology.pdf" rel="noopener noreferrer" target="_blank">https://www.jpmorganchase.com/content/dam/jpmc/jpmorgan-chase-and-co/investor-relations/documents/events/2023/jpmc-investor-day-2023/global-technology.pdf</a></p><p>[11]&nbsp;Linear calculations are like linear equations (i.e., y = 5x + 6; if x = 5, y = 31).</p><p>[12]&nbsp;<a href="https://arxiv.org/pdf/1706.03762.pdf" rel="noopener noreferrer" target="_blank">https://arxiv.org/pdf/1706.03762.pdf</a></p><p>[13]&nbsp;<a href="https://www.economist.com/interactive/science-and-technology/2023/04/22/large-creative-ai-models-will-transform-how-we-live-and-work" rel="noopener noreferrer" target="_blank">https://www.economist.com/interactive/science-and-technology/2023/04/22/large-creative-ai-models-will-transform-how-we-live-and-work</a></p><p>[14]&nbsp;<a href="https://ai.googleblog.com/2018/05/smart-compose-using-neural-networks-to.html" rel="noopener noreferrer" target="_blank">https://ai.googleblog.com/2018/05/smart-compose-using-neural-networks-to.html</a></p><p>[15]&nbsp;<a href="https://www8.gsb.columbia.edu/cbs-directory/detail/djw2104" rel="noopener noreferrer" target="_blank">https://www8.gsb.columbia.edu/cbs-directory/detail/djw2104</a></p><p>[16]&nbsp;<a href="https://github.com/microsoft/JARVIS" rel="noopener noreferrer" target="_blank">https://github.com/microsoft/JARVIS</a></p><p>[17]&nbsp;<a href="https://simonwillison.net/2023/Apr/14/worst-that-can-happen/" rel="noopener noreferrer" target="_blank">https://simonwillison.net/2023/Apr/14/worst-that-can-happen/</a></p><p>[18]&nbsp;<a href="https://www.wsj.com/articles/SB10001424127887324755104579071304170686532" rel="noopener noreferrer" target="_blank">https://www.wsj.com/articles/SB10001424127887324755104579071304170686532</a></p><p>[19]&nbsp;<a href="https://www.wsj.com/articles/jpmorgan-admits-widespread-record-keeping-breakdown-in-sec-settlement-11639747801" rel="noopener noreferrer" target="_blank">https://www.wsj.com/articles/jpmorgan-admits-widespread-record-keeping-breakdown-in-sec-settlement-11639747801</a></p><p>[20]&nbsp;<a href="https://www.whitehouse.gov/briefing-room/statements-releases/2023/05/04/fact-sheet-biden-harris-administration-announces-new-actions-to-promote-responsible-ai-innovation-that-protects-americans-rights-and-safety/" rel="noopener noreferrer" target="_blank">https://www.whitehouse.gov/briefing-room/statements-releases/2023/05/04/fact-sheet-biden-harris-administration-announces-new-actions-to-promote-responsible-ai-innovation-that-protects-americans-rights-and-safety/</a></p><p>[21]&nbsp;<a href="https://www.reuters.com/technology/italys-data-watchdog-chatgpt-can-resume-april-30-if-openai-takes-useful-steps-2023-04-18/" rel="noopener noreferrer" target="_blank">https://www.reuters.com/technology/italys-data-watchdog-chatgpt-can-resume-april-30-if-openai-takes-useful-steps-2023-04-18/</a></p>]]>
    </description>
    <link>https://johnnyclee.com/i/Ubx0xYoOW8I/</link>
    <itunes:image href="https://cdn.johnnyclee.com/main-johnnyclee-com/production/images/item-2da602c11de89d0bde975bfa603fe804.jpg"/>
    <itunes:episodeType>full</itunes:episodeType>
  </item>
</channel>
</rss>