Getting data from WRDS#

Wharton Research Data Services (WRDS) is a comprehensive data platform widely used in academic and professional research for finance, economics, and business. It provides access to an extensive collection of datasets, including stock prices, financial statements, and market data from sources like CRSP, Compustat, and more.

import wrds

db = wrds.Connection()
Loading library list...
Done

You will need an account to access WRDS. Instructions for creating an account are available here. The first step is to fill out this form using the Class Code provided on Canvas.

The first time you log in from a computer, you can select “Y” to create a .pgpass file, which will remember your password for future logins. This file contains your personal login information, so don’t save it on a computer that isn’t yours and don’t share it with anyone.

WRDS provides access to many databases. The Kelley School has subscriptions to many of these but not all. We can list all the sources like this:

db.list_libraries()
['aha_sample',
 'ahasamp',
 'audit',
 'audit_acct_os',
 'audit_audit_comp',
 'audit_common',
 'audit_corp_legal',
 'auditsmp',
 'auditsmp_all',
 'bank',
 'bank_all',
 'bank_premium_samp',
 'banksamp',
 'block',
 'block_all',
 'boardex',
 'boardex_na',
 'boardex_trial',
 'boardsmp',
 'bvd_amadeus_trial',
 'bvd_bvdbankf_trial',
 'bvd_orbis_trial',
 'bvdsamp',
 'calcbench_trial',
 'calcbnch',
 'candid_samp',
 'cboe',
 'cboe_all',
 'cboe_sample',
 'cboesamp',
 'cddsamp',
 'ciq',
 'ciq_common',
 'ciqsamp',
 'ciqsamp_capstrct',
 'ciqsamp_common',
 'ciqsamp_keydev',
 'ciqsamp_pplintel',
 'ciqsamp_ratings',
 'ciqsamp_transactions',
 'ciqsamp_transcripts',
 'cisdmsmp',
 'columnar',
 'comp',
 'comp_bank_daily',
 'comp_execucomp',
 'comp_global_daily',
 'comp_na_daily_all',
 'comp_segments_hist_daily',
 'compsamp',
 'compsamp_all',
 'compsamp_snapshot',
 'compseg',
 'contrib',
 'contrib_as_filed_financials',
 'contrib_ceo_turnover',
 'contrib_char_returns',
 'contrib_corporate_culture',
 'contrib_general',
 'contrib_global_factor',
 'contrib_intangible_value',
 'contrib_kpss',
 'contrib_liquidity_taq',
 'contrib_liva',
 'crsp',
 'crsp_a_ccm',
 'crsp_a_indexes',
 'crsp_a_stock',
 'crsp_a_treasuries',
 'crsp_a_ziman',
 'crsp_q_mutualfunds',
 'crspsamp',
 'crspsamp_all',
 'crspsamp_mf',
 'csmsamp_all',
 'dealscan',
 'djones',
 'djones_all',
 'dmef',
 'dmef_all',
 'doe',
 'doe_all',
 'etfg_samp',
 'etfgsamp',
 'eventus',
 'evts_all',
 'execcomp',
 'factsamp_all',
 'factsamp_revere',
 'ff',
 'ff_all',
 'fisdsamp',
 'fisdsamp_all',
 'fjc',
 'fjc_linking',
 'fjc_litigation',
 'frb',
 'frb_all',
 'fssamp',
 'ftsesamp',
 'ftsesamp_russell_us',
 'gutenberg',
 'hfrsamp',
 'hfrsamp_hfrdb',
 'ibes',
 'ibessamp_kpi',
 'ifgrsamp',
 'ims_obp_trial',
 'imssamp',
 'infasamp',
 'infogroupsamp_business',
 'infogroupsamp_residential',
 'infraclear_samp',
 'insdsamp',
 'iri',
 'iri_all',
 'issm_nasd1990',
 'issm_nasd1991',
 'issm_nasd1992',
 'issm_nyam1990',
 'issm_nyam1991',
 'issm_nyam1992',
 'kpisamp',
 'macrofin',
 'macrofin_comm_trade',
 'mfl',
 'mflinks_all',
 'midas',
 'morningstarsamp_cisdm',
 'mpsych_sample',
 'mrktsamp',
 'mrktsamp_cds',
 'mrktsamp_cdx',
 'mrktsamp_msf',
 'msci_esg_samp',
 'msciesmp',
 'msrb',
 'msrb_all',
 'msrbsamp',
 'msrbsamp_all',
 'omtrial',
 'optionm',
 'optionm_all',
 'optionmsamp_europe',
 'optionmsamp_us',
 'otc',
 'otc_endofday',
 'phlx',
 'phlx_all',
 'pitchsmp',
 'preqsamp',
 'preqsamp_all',
 'public',
 'public_all',
 'pwt',
 'pwt_all',
 'ravenpack_common',
 'ravenpack_dj',
 'ravenpack_full',
 'ravenpack_trial',
 'ravenpack_web',
 'reprisk',
 'reprisk_common',
 'reprisk_pm',
 'reprisk_sample',
 'reprisk_v2',
 'repsamp',
 'revelio_samp',
 'revsamp',
 'risk',
 'risk_directors',
 'risk_governance',
 'risksamp',
 'risksamp_all',
 'rpna',
 'rpnasamp',
 'rq_all',
 'rstat_samp',
 'rstatsmp',
 'sdcsamp',
 'secsamp',
 'secsamp_all',
 'shvlsamp',
 'snapsamp',
 'snlsamp',
 'snlsamp_fig',
 'sp_esg',
 'sustainalyticssamp_all',
 'sustsamp',
 'taqm_2003',
 'taqm_2004',
 'taqm_2005',
 'taqm_2006',
 'taqm_2007',
 'taqm_2008',
 'taqm_2009',
 'taqm_2010',
 'taqm_2011',
 'taqm_2012',
 'taqm_2013',
 'taqm_2014',
 'taqm_2015',
 'taqm_2016',
 'taqm_2017',
 'taqm_2018',
 'taqm_2019',
 'taqm_2020',
 'taqm_2021',
 'taqm_2022',
 'taqm_2023',
 'taqm_2024',
 'taqm_2025',
 'taqmsamp',
 'taqmsamp_all',
 'taqmsec',
 'taqsamp',
 'taqsamp_all',
 'tfn',
 'totalq',
 'totalq_all',
 'tr_13f',
 'tr_common',
 'tr_dealscan',
 'tr_ds_comds',
 'tr_ds_econ',
 'tr_ds_equities',
 'tr_ds_fut',
 'tr_ibes',
 'tr_ibes_guidance',
 'tr_insiders',
 'tr_mutualfunds',
 'tr_sdc_samples',
 'trace',
 'trace_enhanced',
 'trace_standard',
 'trcommon',
 'trcstsmp',
 'trdbdmismp',
 'trdbwbsmp',
 'trdssamp',
 'trdstrm',
 'tresgsmp',
 'trsamp',
 'trsamp_all',
 'trsamp_db_dmi',
 'trsamp_db_wb',
 'trsamp_ds_eq',
 'trsamp_dscom',
 'trsamp_dsecon',
 'trsamp_dsfut',
 'trsamp_esg',
 'trsamp_sdc_ma',
 'trsamp_sdc_ni',
 'trsamp_worldscope',
 'trucost',
 'trucost_common',
 'trucost_samp',
 'twoiq_samp',
 'twoiqsmp',
 'wappsamp',
 'wenvsmp',
 'wmfsmp',
 'wrds_environmental_samp',
 'wrds_insiders_samp',
 'wrds_lib_internal',
 'wrds_mutualfund_samp',
 'wrds_sec_search',
 'wrds_shortvolume_samp',
 'wrdsapps',
 'wrdsapps_backtest_basic',
 'wrdsapps_backtest_plus',
 'wrdsapps_eushort',
 'wrdsapps_evtstudy_int',
 'wrdsapps_evtstudy_int_ginsight',
 'wrdsapps_evtstudy_intraday_rav',
 'wrdsapps_evtstudy_lr',
 'wrdsapps_evtstudy_us',
 'wrdsapps_finratio',
 'wrdsapps_finratio_ccm',
 'wrdsapps_finratio_ibes',
 'wrdsapps_finratio_ibes_ccm',
 'wrdsapps_link_comp_eushort',
 'wrdsapps_link_crsp_bond',
 'wrdsapps_link_crsp_comp_bdx',
 'wrdsapps_link_crsp_factset',
 'wrdsapps_link_crsp_ibes',
 'wrdsapps_link_crsp_optionm',
 'wrdsapps_link_crsp_taq',
 'wrdsapps_link_crsp_taqm',
 'wrdsapps_link_supplychain',
 'wrdsapps_link_trinsider_bdx',
 'wrdsapps_patents',
 'wrdsapps_plink_boardex_trinsider',
 'wrdsapps_plink_exec_boardex',
 'wrdsapps_plink_exec_trinsider',
 'wrdsapps_subsidiary',
 'wrdsapps_windices',
 'wrdsappssamp_all',
 'wrdssec',
 'wrdssec_all',
 'wrdssec_bow',
 'wrdssec_common',
 'wrdssec_insiders',
 'wrdssec_midas',
 'wrdssec_secsa',
 'zacksamp',
 'zacksamp_all']

Most databases have different tables, each containing information in difference format. Typically, we will need to join information from different tables into one table to perform the desired analysis.

# list all the tables in the CRSP library

db.list_tables(library='crsp')
['acti',
 'asia',
 'asib',
 'asic',
 'asio',
 'asix',
 'bmdebt',
 'bmheader',
 'bmpaymts',
 'bmquotes',
 'bmyield',
 'bndprt06',
 'bndprt12',
 'bxcalind',
 'bxdlyind',
 'bxmthind',
 'bxquotes',
 'bxyield',
 'cap',
 'ccm_lookup',
 'ccm_qvards',
 'ccmxpf_linktable',
 'ccmxpf_lnkhist',
 'ccmxpf_lnkrng',
 'ccmxpf_lnkused',
 'comphead',
 'comphist',
 'compmaster',
 'contact_info',
 'core',
 'crsp_cik_map',
 'crsp_daily_data',
 'crsp_header',
 'crsp_monthly_data',
 'crsp_names',
 'crsp_portno_map',
 'crsp_ziman_daily_index',
 'crsp_ziman_monthly_index',
 'cs20yr',
 'cs5yr',
 'cs90d',
 'cst_hist',
 'daily_nav',
 'daily_nav_ret',
 'daily_returns',
 'dividends',
 'dport1',
 'dport2',
 'dport3',
 'dport4',
 'dport5',
 'dport6',
 'dport7',
 'dport8',
 'dport9',
 'dsbc',
 'dsbo',
 'dse',
 'dse62',
 'dse62delist',
 'dse62dist',
 'dse62exchdates',
 'dse62names',
 'dse62nasdin',
 'dse62shares',
 'dseall',
 'dseall62',
 'dsedelist',
 'dsedist',
 'dseexchdates',
 'dsenames',
 'dsenasdin',
 'dseshares',
 'dsf',
 'dsf62',
 'dsf62_v2',
 'dsf_v2',
 'dsfhdr',
 'dsfhdr62',
 'dsi',
 'dsi62',
 'dsia',
 'dsib',
 'dsic',
 'dsio',
 'dsir',
 'dsix',
 'dsiy',
 'dsp500',
 'dsp500_v2',
 'dsp500list',
 'dsp500list_v2',
 'dsp500p',
 'dssc',
 'dsso',
 'eod_cap',
 'eod_core',
 'eod_esg',
 'eod_sector',
 'eod_vg',
 'erdport1',
 'erdport2',
 'erdport3',
 'erdport4',
 'erdport5',
 'erdport6',
 'erdport7',
 'erdport8',
 'erdport9',
 'ermport1',
 'ermport2',
 'ermport3',
 'ermport4',
 'ermport5',
 'esg',
 'fbpri',
 'fbyld',
 'front_load',
 'front_load_det',
 'front_load_grp',
 'fund_fees',
 'fund_flows',
 'fund_hdr',
 'fund_hdr_hist',
 'fund_names',
 'fund_style',
 'fund_summary',
 'fund_summary2',
 'fwdask06',
 'fwdask12',
 'fwdave06',
 'fwdave12',
 'fwdbid06',
 'fwdbid12',
 'hldask06',
 'hldask12',
 'hldave06',
 'hldave12',
 'hldbid06',
 'hldbid12',
 'holdings',
 'holdings_co_info',
 'idx_const_close_pf_v2',
 'idx_const_close_v2',
 'idx_const_open_pf_v2',
 'idx_const_open_proj_v2',
 'idx_const_open_v2',
 'idx_levels',
 'inddlyseriesdata',
 'inddlyseriesdata62',
 'inddlyseriesdata_ind',
 'index_descriptions',
 'index_type_map',
 'indfamilyinfohdr',
 'indfamilyinfohdr62',
 'indfamilyinfohdr_ind',
 'indissrebalancesummary_ind',
 'indmthseriesdata',
 'indmthseriesdata62',
 'indmthseriesdata_ind',
 'indsecrebalancesummary_ind',
 'indseriesinfohdr',
 'indseriesinfohdr62',
 'indseriesinfohdr_ind',
 'mbi',
 'mbmdat',
 'mbmhdr',
 'mbx',
 'mbxid',
 'mcti',
 'metacalendarperiod',
 'metacalendarperiod62',
 'metacalendarperiod_ind',
 'metacolumncoverage',
 'metacolumncoverage62',
 'metacolumncoverage_ind',
 'metacolumninfo',
 'metacolumninfo62',
 'metacolumninfo_ind',
 'metaexchangecalendar',
 'metaexchangecalendar62',
 'metaexchangecalendar_ind',
 'metafileinfo',
 'metafileinfo62',
 'metafileinfo_ind',
 'metaflagcoverage',
 'metaflagcoverage62',
 'metaflagcoverage_ind',
 'metaflaginfo',
 'metaflaginfo62',
 'metaflaginfo_ind',
 'metaflagtype',
 'metaflagtype62',
 'metaflagtype_ind',
 'metaiteminfo',
 'metaiteminfo62',
 'metaiteminfo_ind',
 'metasiztociz',
 'metasiztociz62',
 'metasiztociz_ind',
 'mfdbname',
 'mhista',
 'mhistn',
 'mhistq',
 'monthly_nav',
 'monthly_returns',
 'monthly_tna',
 'monthly_tna_ret_nav',
 'mport1',
 'mport2',
 'mport3',
 'mport4',
 'mport5',
 'mse',
 'mse62',
 'mse62delist',
 'mse62dist',
 'mse62exchdates',
 'mse62names',
 'mse62nasdin',
 'mse62shares',
 'mseall',
 'mseall62',
 'msedelist',
 'msedist',
 'mseexchdates',
 'msenames',
 'msenasdin',
 'mseshares',
 'msf',
 'msf62',
 'msf62_v2',
 'msf_v2',
 'msfhdr',
 'msfhdr62',
 'msi',
 'msi62',
 'msia',
 'msib',
 'msic',
 'msio',
 'msir',
 'msix',
 'msiy',
 'msp500',
 'msp500_v2',
 'msp500list',
 'msp500list_v2',
 'msp500p',
 'portnomap',
 'priask06',
 'priask12',
 'priave06',
 'priave12',
 'pribid06',
 'pribid12',
 'price_type',
 'property_type',
 'qcti',
 'qsia',
 'qsib',
 'qsic',
 'qsio',
 'qsix',
 'rear_load',
 'rear_load_det',
 'rear_load_grp',
 'rebala',
 'rebaln',
 'rebalq',
 'reit_type',
 'riskfree',
 's6z_agg_ann',
 's6z_agg_ann_legacy',
 's6z_agg_mth',
 's6z_agg_mth_legacy',
 's6z_agg_qtr',
 's6z_agg_qtr_legacy',
 's6z_del',
 's6z_del_legacy',
 's6z_dind',
 's6z_dind_legacy',
 's6z_dis',
 's6z_dis_legacy',
 's6z_dp_dly',
 's6z_dp_dly_legacy',
 's6z_ds_dly',
 's6z_ds_dly_legacy',
 's6z_hdr',
 's6z_hdr_legacy',
 's6z_indhdr',
 's6z_indhdr_legacy',
 's6z_mdel',
 's6z_mdel_legacy',
 's6z_mind',
 's6z_mind_legacy',
 's6z_mth',
 's6z_mth_legacy',
 's6z_nam',
 's6z_nam_legacy',
 's6z_ndi',
 's6z_ndi_legacy',
 's6z_shr',
 's6z_shr_legacy',
 'saz_agg_ann',
 'saz_agg_ann_legacy',
 'saz_agg_mth',
 'saz_agg_mth_legacy',
 'saz_agg_qtr',
 'saz_agg_qtr_legacy',
 'saz_del',
 'saz_del_legacy',
 'saz_dind',
 'saz_dind_legacy',
 'saz_dis',
 'saz_dis_legacy',
 'saz_dp_dly',
 'saz_dp_dly_legacy',
 'saz_ds_dly',
 'saz_ds_dly_legacy',
 'saz_hdr',
 'saz_hdr_legacy',
 'saz_indhdr',
 'saz_indhdr_legacy',
 'saz_mdel',
 'saz_mdel_legacy',
 'saz_mind',
 'saz_mind_legacy',
 'saz_mth',
 'saz_mth_legacy',
 'saz_nam',
 'saz_nam_legacy',
 'saz_ndi',
 'saz_ndi_legacy',
 'saz_shr',
 'saz_shr_legacy',
 'sechead',
 'sechist',
 'sector',
 'sfz_dind',
 'sfz_dind_legacy',
 'sfz_indhdr',
 'sfz_indhdr_legacy',
 'sfz_mbr',
 'sfz_mbr_legacy',
 'sfz_mind',
 'sfz_mind_legacy',
 'sfz_portd',
 'sfz_portd_legacy',
 'sfz_portm',
 'sfz_portm_legacy',
 'sfz_rb',
 'sfz_rb_legacy',
 'stkannsecuritydata',
 'stkannsecuritydata62',
 'stkdelists',
 'stkdelists62',
 'stkdistributions',
 'stkdistributions62',
 'stkdlycumulativeadjfactor',
 'stkdlycumulativeadjfactor62',
 'stkdlysecuritydata',
 'stkdlysecuritydata62',
 'stkdlysecurityprimarydata',
 'stkdlysecurityprimarydata62',
 'stkindissuerstatistics_ind',
 'stkindmembership_ind',
 'stkindsecuritystatistics_ind',
 'stkissuerinfohdr',
 'stkissuerinfohdr62',
 'stkissuerinfohist',
 'stkissuerinfohist62',
 'stkmthcumulativeadjfactor',
 'stkmthcumulativeadjfactor62',
 'stkmthfloatshares',
 'stkmthfloatshares62',
 'stkmthsecuritydata',
 'stkmthsecuritydata62',
 'stkqtrsecuritydata',
 'stkqtrsecuritydata62',
 'stksecurityinfohdr',
 'stksecurityinfohdr62',
 'stksecurityinfohist',
 'stksecurityinfohist62',
 'stkshares',
 'stkshares62',
 'stock_qvards',
 'stocknames',
 'stocknames62',
 'stocknames62_v2',
 'stocknames_v2',
 'sub_property_type',
 'tfz_dly',
 'tfz_dly_cd',
 'tfz_dly_cpi',
 'tfz_dly_ft',
 'tfz_dly_rf2',
 'tfz_dly_ts2',
 'tfz_idx',
 'tfz_iss',
 'tfz_mast',
 'tfz_mth',
 'tfz_mth_bp',
 'tfz_mth_cd',
 'tfz_mth_cpi',
 'tfz_mth_fb',
 'tfz_mth_ft',
 'tfz_mth_rf',
 'tfz_mth_rf2',
 'tfz_mth_ts',
 'tfz_mth_ts2',
 'tfz_pay',
 'vg',
 'wrds_dailyindexret62_query',
 'wrds_dailyindexret_query',
 'wrds_dsf62v2_query',
 'wrds_dsfv2_query',
 'wrds_inddlytranspose_query',
 'wrds_indmthtranspose_query',
 'wrds_monthlyindexret62_query',
 'wrds_monthlyindexret_query',
 'wrds_msf62v2_query',
 'wrds_msfv2_query',
 'wrds_names62_query',
 'wrds_names_query',
 'yldask06',
 'yldask12',
 'yldave06',
 'yldave12',
 'yldbid06',
 'yldbid12',
 'ziman_reit_info',
 'zr_hdrnames']

The dsf_v2 table contains daily information about stocks that trade on major U.S. markets. There’s a similar msf_v2 file that contains monthly data.

db.describe_table(library='crsp', table='dsf_v2')
Approximately 107661176 rows in crsp.dsf_v2.
name nullable type comment
0 permno True INTEGER PERMNO
1 hdrcusip True VARCHAR(8) Header CUSIP -8 Characters
2 permco True INTEGER PERMCO
3 siccd True INTEGER Sic Code
4 nasdissuno True INTEGER Nasdaq Issue Number
5 yyyymmdd True INTEGER YYYYMMDD - Daily Calendar Period Key
6 sharetype True VARCHAR(3) Share Type
7 securitytype True VARCHAR(4) Security Type
8 securitysubtype True VARCHAR(3) Security Sub-Type
9 usincflg True VARCHAR(1) US Incorporation Flag
10 issuertype True VARCHAR(4) Issuer Type
11 primaryexch True VARCHAR(1) Primary Exchange
12 conditionaltype True VARCHAR(3) Conditional Type
13 tradingstatusflg True VARCHAR(1) Trading Status Flag
14 dlycaldt True DATE Daily Calendar Date
15 dlydelflg True VARCHAR(1) Daily Delisting Flag
16 dlyprc True NUMERIC(13, 6) Daily Price
17 dlyprcflg True VARCHAR(2) Daily Price Flag
18 dlycap True NUMERIC(13, 2) Daily Capitalization
19 dlycapflg True VARCHAR(2) Daily Capitalization Flag
20 dlyprevprc True NUMERIC(13, 6) Daily Previous Price
21 dlyprevprcflg True VARCHAR(2) Daily Previous Price Flag
22 dlyprevdt True DATE Daily Previous Price Date
23 dlyprevcap True NUMERIC(13, 2) Daily Previous Capitalization
24 dlyprevcapflg True VARCHAR(2) Daily Previous Capitalization Flag
25 dlyret True NUMERIC(10, 6) Daily Total Return
26 dlyretx True NUMERIC(10, 6) Daily Price Return
27 dlyreti True NUMERIC(10, 6) Daily Income Return
28 dlyretmissflg True VARCHAR(2) Daily Return Missing Flag
29 dlyretdurflg True VARCHAR(2) Daily Return Duration Flag
30 dlyorddivamt True NUMERIC(13, 6) Daily Ordinary Dividend Amount
31 dlynonorddivamt True NUMERIC(13, 6) Daily Non-Ordinary Dividend Amount
32 dlyfacprc True NUMERIC(10, 6) Daily Factor To Adjust Price
33 dlydistretflg True VARCHAR(2) Daily Distribution Return Impact Flag
34 dlyvol True NUMERIC(14, 0) Daily Volume
35 dlyclose True NUMERIC(13, 6) Daily Close
36 dlylow True NUMERIC(13, 6) Daily Low
37 dlyhigh True NUMERIC(13, 6) Daily High
38 dlybid True NUMERIC(13, 6) Daily Bid
39 dlyask True NUMERIC(13, 6) Daily Ask
40 dlyopen True NUMERIC(13, 6) Daily Open
41 dlynumtrd True INTEGER Daily Number Of Trades
42 dlymmcnt True SMALLINT Daily Market Maker Count
43 dlyprcvol True NUMERIC(14, 1) Daily Price Volume
44 dlycumfacpr True NUMERIC(13, 6) Daily Cumulative Factor to Adjust Price
45 dlycumfacshr True NUMERIC(13, 6) Daily Cumulative Factor to Adjust Shares/Volume
46 cusip True VARCHAR(8) CUSIP
47 ticker True VARCHAR(5) Ticker
48 exchangetier True VARCHAR(3) Exchange Tier
49 shrout True INTEGER Shares Outstanding

Company header information#

These files refer to companies using a permno identifier. To find companies by the more traditional stock ticker—or their name—we can look at the wrds_name_query table.

db.describe_table('crsp', 'wrds_names_query')
Approximately 186251 rows in crsp.wrds_names_query.
name nullable type comment
0 permno True INTEGER PERMNO
1 hdrprimaryexch True VARCHAR(1) Header Primary Exchange
2 nasdissuno True INTEGER Nasdaq Issue Number
3 hdrsiccd True INTEGER Header SIC Code
4 permco True INTEGER PERMCO
5 secinfostartdt True DATE Security Information Start Date
6 secinfoenddt True DATE Security Information End Date
7 securitybegdt True DATE Begin Date of Stock Data
8 securityenddt True DATE End Date of Stock Data
9 cusip True VARCHAR(8) CUSIP
10 ticker True VARCHAR(5) Ticker
11 issuernm True VARCHAR(50) Issuer Name
12 shareclass True VARCHAR(1) Share Class
13 usincflg True VARCHAR(1) US Incorporation Flag
14 issuertype True VARCHAR(4) Issuer Type
15 securitytype True VARCHAR(4) Security Type
16 securitysubtype True VARCHAR(3) Security Sub-Type
17 sharetype True VARCHAR(3) Share Type
18 securityactiveflg True VARCHAR(1) Security Active Flag
19 siccd True INTEGER Sic Code
20 primaryexch True VARCHAR(1) Primary Exchange
21 tradingsymbol True VARCHAR(7) Trading Symbol
22 naics True VARCHAR(6) NAICS Code
23 tradingstatusflg True VARCHAR(1) Trading Status Flag
comps = db.get_table('crsp', 'wrds_names_query',
                     columns=['permno', 'ticker', 'secinfostartdt', 'secinfoenddt', 'siccd', 'issuernm'],
                     date_cols=['secinfostartdt', 'secinfoenddt'])

comps.fillna(np.nan, inplace=True)
comps.head()
permno ticker secinfostartdt secinfoenddt siccd issuernm
0 10000 OMFGA 1986-01-07 1986-12-03 3990 OPTIMUM MANUFACTURING INC
1 10000 OMFGA 1986-12-04 1987-03-09 3990 OPTIMUM MANUFACTURING INC
2 10000 OMFGA 1987-03-10 1987-06-11 3990 OPTIMUM MANUFACTURING INC
3 10000 <NA> 1987-06-12 1987-06-12 0 OPTIMUM MANUFACTURING INC (Last Known)
4 10001 GFGC 1986-01-09 1986-09-01 4920 GREAT FALLS GAS CO
comps[comps['ticker']=='AAPL']
permno ticker secinfostartdt secinfoenddt siccd issuernm
21651 14593 AAPL 1980-12-12 1982-03-31 3573 APPLE COMPUTER INC
21652 14593 AAPL 1982-04-01 1982-10-31 3573 APPLE COMPUTER INC
21653 14593 AAPL 1982-11-01 2004-06-09 3573 APPLE COMPUTER INC
21654 14593 AAPL 2004-06-10 2006-07-02 3573 APPLE COMPUTER INC
21655 14593 AAPL 2006-07-03 2007-01-10 3573 APPLE COMPUTER INC
21656 14593 AAPL 2007-01-11 2009-03-26 3571 APPLE INC
21657 14593 AAPL 2009-03-27 2017-12-27 3571 APPLE INC
21658 14593 AAPL 2017-12-28 2023-09-28 3571 APPLE INC
21659 14593 AAPL 2023-09-29 2023-09-30 3571 APPLE INC
21660 14593 AAPL 2023-10-01 2024-12-31 3571 APPLE INC
comps[comps['ticker']=='IBM']
permno ticker secinfostartdt secinfoenddt siccd issuernm
12932 12490 IBM 1962-07-02 1968-01-01 3573 INTERNATIONAL BUSINESS MACHS COR
12933 12490 IBM 1968-01-02 1999-01-03 3573 INTERNATIONAL BUSINESS MACHS COR
12934 12490 IBM 1999-01-04 2002-01-01 3571 INTERNATIONAL BUSINESS MACHS COR
12935 12490 IBM 2002-01-02 2004-06-09 3571 INTERNATIONAL BUSINESS MACHS COR
12936 12490 IBM 2004-06-10 2008-06-26 3571 INTERNATIONAL BUSINESS MACHS COR
12937 12490 IBM 2008-06-27 2014-01-21 3571 INTERNATIONAL BUSINESS MACHS COR
12938 12490 IBM 2014-01-22 2021-12-02 7379 INTERNATIONAL BUSINESS MACHS COR
12939 12490 IBM 2021-12-03 2023-09-28 3571 INTERNATIONAL BUSINESS MACHS COR
12940 12490 IBM 2023-09-29 2023-09-30 3571 INTERNATIONAL BUSINESS MACHS COR
12941 12490 IBM 2023-10-01 2024-12-31 3571 INTERNATIONAL BUSINESS MACHS COR

Note that when we look up data using permno we find an additional record.

comps[comps['permno']==12490]
permno ticker secinfostartdt secinfoenddt siccd issuernm
12931 12490 <NA> 1925-12-31 1962-07-01 3570 INTERNATIONAL BUSINESS MACHS COR
12932 12490 IBM 1962-07-02 1968-01-01 3573 INTERNATIONAL BUSINESS MACHS COR
12933 12490 IBM 1968-01-02 1999-01-03 3573 INTERNATIONAL BUSINESS MACHS COR
12934 12490 IBM 1999-01-04 2002-01-01 3571 INTERNATIONAL BUSINESS MACHS COR
12935 12490 IBM 2002-01-02 2004-06-09 3571 INTERNATIONAL BUSINESS MACHS COR
12936 12490 IBM 2004-06-10 2008-06-26 3571 INTERNATIONAL BUSINESS MACHS COR
12937 12490 IBM 2008-06-27 2014-01-21 3571 INTERNATIONAL BUSINESS MACHS COR
12938 12490 IBM 2014-01-22 2021-12-02 7379 INTERNATIONAL BUSINESS MACHS COR
12939 12490 IBM 2021-12-03 2023-09-28 3571 INTERNATIONAL BUSINESS MACHS COR
12940 12490 IBM 2023-09-29 2023-09-30 3571 INTERNATIONAL BUSINESS MACHS COR
12941 12490 IBM 2023-10-01 2024-12-31 3571 INTERNATIONAL BUSINESS MACHS COR

The CRSP data goes back to 1926.

comps[comps['secinfostartdt'].dt.year==1925]
permno ticker secinfostartdt secinfoenddt siccd issuernm
37 10006 <NA> 1925-12-31 1954-05-31 3740 AMERICAN CAR & FDRY CO
118 10022 <NA> 1925-12-31 1957-06-30 3420 AMERICAN SAFETY RAZOR CORP
164 10655 <NA> 1925-12-31 1927-02-23 2250 AUTO KNITTER HOSIERY INC
181 10030 <NA> 1925-12-31 1943-04-26 3310 AMERICAN BRAKE SHOE & FDRY
297 10049 <NA> 1925-12-31 1932-09-28 2640 ABITIBI POWER & PAPER LTD
... ... ... ... ... ... ...
64907 27561 <NA> 1925-12-31 1962-07-01 4000 BALTIMORE & OHIO RR CO
65057 27692 <NA> 1925-12-31 1961-01-30 4000 BANGOR & AROOSTOOK RR CO
65884 28513 <NA> 1925-12-31 1962-07-01 3350 MAGMA COPPER CO
89724 58368 <NA> 1925-12-31 1944-02-08 4000 ST LOUIS SOUTH WESTN RY CO
104822 75471 <NA> 1925-12-31 1952-01-01 1200 PACIFIC COAST CO NJ

509 rows × 6 columns

comps[comps['permno']==10006]
permno ticker secinfostartdt secinfoenddt siccd issuernm
37 10006 <NA> 1925-12-31 1954-05-31 3740 AMERICAN CAR & FDRY CO
38 10006 <NA> 1954-06-01 1962-07-01 3740 A C F INDUSTRIES INC
39 10006 ACF 1962-07-02 1968-01-01 3743 A C F INDUSTRIES INC
40 10006 ACF 1968-01-02 1984-06-28 3743 A C F INDUSTRIES INC
41 10006 <NA> 1984-06-29 1984-06-29 0 A C F INDUSTRIES INC (Last Known)

Daily stock data#

db.describe_table('crsp', 'dsf_v2')
Approximately 107661176 rows in crsp.dsf_v2.
name nullable type comment
0 permno True INTEGER PERMNO
1 hdrcusip True VARCHAR(8) Header CUSIP -8 Characters
2 permco True INTEGER PERMCO
3 siccd True INTEGER Sic Code
4 nasdissuno True INTEGER Nasdaq Issue Number
5 yyyymmdd True INTEGER YYYYMMDD - Daily Calendar Period Key
6 sharetype True VARCHAR(3) Share Type
7 securitytype True VARCHAR(4) Security Type
8 securitysubtype True VARCHAR(3) Security Sub-Type
9 usincflg True VARCHAR(1) US Incorporation Flag
10 issuertype True VARCHAR(4) Issuer Type
11 primaryexch True VARCHAR(1) Primary Exchange
12 conditionaltype True VARCHAR(3) Conditional Type
13 tradingstatusflg True VARCHAR(1) Trading Status Flag
14 dlycaldt True DATE Daily Calendar Date
15 dlydelflg True VARCHAR(1) Daily Delisting Flag
16 dlyprc True NUMERIC(13, 6) Daily Price
17 dlyprcflg True VARCHAR(2) Daily Price Flag
18 dlycap True NUMERIC(13, 2) Daily Capitalization
19 dlycapflg True VARCHAR(2) Daily Capitalization Flag
20 dlyprevprc True NUMERIC(13, 6) Daily Previous Price
21 dlyprevprcflg True VARCHAR(2) Daily Previous Price Flag
22 dlyprevdt True DATE Daily Previous Price Date
23 dlyprevcap True NUMERIC(13, 2) Daily Previous Capitalization
24 dlyprevcapflg True VARCHAR(2) Daily Previous Capitalization Flag
25 dlyret True NUMERIC(10, 6) Daily Total Return
26 dlyretx True NUMERIC(10, 6) Daily Price Return
27 dlyreti True NUMERIC(10, 6) Daily Income Return
28 dlyretmissflg True VARCHAR(2) Daily Return Missing Flag
29 dlyretdurflg True VARCHAR(2) Daily Return Duration Flag
30 dlyorddivamt True NUMERIC(13, 6) Daily Ordinary Dividend Amount
31 dlynonorddivamt True NUMERIC(13, 6) Daily Non-Ordinary Dividend Amount
32 dlyfacprc True NUMERIC(10, 6) Daily Factor To Adjust Price
33 dlydistretflg True VARCHAR(2) Daily Distribution Return Impact Flag
34 dlyvol True NUMERIC(14, 0) Daily Volume
35 dlyclose True NUMERIC(13, 6) Daily Close
36 dlylow True NUMERIC(13, 6) Daily Low
37 dlyhigh True NUMERIC(13, 6) Daily High
38 dlybid True NUMERIC(13, 6) Daily Bid
39 dlyask True NUMERIC(13, 6) Daily Ask
40 dlyopen True NUMERIC(13, 6) Daily Open
41 dlynumtrd True INTEGER Daily Number Of Trades
42 dlymmcnt True SMALLINT Daily Market Maker Count
43 dlyprcvol True NUMERIC(14, 1) Daily Price Volume
44 dlycumfacpr True NUMERIC(13, 6) Daily Cumulative Factor to Adjust Price
45 dlycumfacshr True NUMERIC(13, 6) Daily Cumulative Factor to Adjust Shares/Volume
46 cusip True VARCHAR(8) CUSIP
47 ticker True VARCHAR(5) Ticker
48 exchangetier True VARCHAR(3) Exchange Tier
49 shrout True INTEGER Shares Outstanding

We can also use SQL (Structured Query Language) to request data from WRDS through the API.

sql = """
    SELECT dlycaldt, dlyprc, dlylow, dlyhigh, dlyopen, dlyvol, dlycap, dlycumfacpr, shrout
    FROM crsp.dsf_v2
    WHERE permno = 14593
    AND dlycaldt >= '08/20/2020' AND dlycaldt <= '09/5/2020'
"""

aapl = db.raw_sql(sql, date_cols=['dlycaldt'])

appl = aapl.set_index('dlycaldt').sort_index(ascending=False)
aapl
dlycaldt dlyprc dlylow dlyhigh dlyopen dlyvol dlycap dlycumfacpr shrout
0 2020-08-20 473.1 462.9335 473.568 463.0 31665110.0 2022802445.4 4.0 4275634
1 2020-08-21 497.48 477.0 499.472 477.05 84280672.0 2127042402.32 4.0 4275634
2 2020-08-24 503.43 495.745 515.14 514.79 86159818.0 2152482424.62 4.0 4275634
3 2020-08-25 499.3 492.21 500.7172 498.79 52733324.0 2134824056.2 4.0 4275634
4 2020-08-26 506.09 500.33 507.97 504.717 40657799.0 2163855611.06 4.0 4275634
5 2020-08-27 500.04 495.33 509.94 508.57 38758679.0 2137988025.36 4.0 4275634
6 2020-08-28 499.23 498.31 505.77 504.05 46822040.0 2134524761.82 4.0 4275634
7 2020-08-31 129.04 126.0 131.0 127.58 225554025.0 2206911245.44 1.0 17102536
8 2020-09-01 134.18 130.53 134.8 132.76 152742024.0 2294818280.48 1.0 17102536
9 2020-09-02 131.4 127.0 137.98 137.59 200042069.0 2247273230.4 1.0 17102536
10 2020-09-03 120.88 120.5 128.84 126.91 257061551.0 2067354551.68 1.0 17102536
11 2020-09-04 120.96 110.89 123.7 120.07 331572938.0 2068722754.56 1.0 17102536
(aapl.set_index('dlycaldt').style
    .format(precision=2)
    .format_index(lambda s: s.strftime("%Y-%m-%d"))
    .format(formatter='{:,.0f}', subset=['dlyvol', 'dlycap', 'shrout'])
)
  dlyprc dlylow dlyhigh dlyopen dlyvol dlycap dlycumfacpr shrout
dlycaldt                
2020-08-20 473.10 462.93 473.57 463.00 31,665,110 2,022,802,445 4.00 4,275,634
2020-08-21 497.48 477.00 499.47 477.05 84,280,672 2,127,042,402 4.00 4,275,634
2020-08-24 503.43 495.75 515.14 514.79 86,159,818 2,152,482,425 4.00 4,275,634
2020-08-25 499.30 492.21 500.72 498.79 52,733,324 2,134,824,056 4.00 4,275,634
2020-08-26 506.09 500.33 507.97 504.72 40,657,799 2,163,855,611 4.00 4,275,634
2020-08-27 500.04 495.33 509.94 508.57 38,758,679 2,137,988,025 4.00 4,275,634
2020-08-28 499.23 498.31 505.77 504.05 46,822,040 2,134,524,762 4.00 4,275,634
2020-08-31 129.04 126.00 131.00 127.58 225,554,025 2,206,911,245 1.00 17,102,536
2020-09-01 134.18 130.53 134.80 132.76 152,742,024 2,294,818,280 1.00 17,102,536
2020-09-02 131.40 127.00 137.98 137.59 200,042,069 2,247,273,230 1.00 17,102,536
2020-09-03 120.88 120.50 128.84 126.91 257,061,551 2,067,354,552 1.00 17,102,536
2020-09-04 120.96 110.89 123.70 120.07 331,572,938 2,068,722,755 1.00 17,102,536

Exercise

Compare these data to information reported on Yahoo! Finance. Can you explain:

  • Do the prices (open, high, low, close) in each source agree? What about volume of shares traded?

  • The Yahoo data indicates a 4:1 stock split on August 31. Is there any evidence of this in the CRSP data?

  • Yahoo also reports a dividend on August 7. Is this indicated in the CRSP data somewhere?

  • In what units are dlycap and shrout reported?

Distributions#

Another table holds information on distributions to shareholders.

db.describe_table('crsp', 'stkdistributions')
Approximately 1033644 rows in crsp.stkdistributions.
name nullable type comment
0 permno True INTEGER PERMNO
1 disexdt True DATE Ex-Distribution Date
2 disseqnbr True SMALLINT Distribution Sequence Number
3 disordinaryflg True VARCHAR(1) Distribution Ordinary Dividend Flag
4 distype True VARCHAR(4) Distribution Type
5 disfreqtype True VARCHAR(3) Distribution Frequency Type
6 dispaymenttype True VARCHAR(4) Distribution Payment Method Type
7 disdetailtype True VARCHAR(8) Distribution Detail Type
8 distaxtype True VARCHAR(3) Distribution Tax Status Type
9 disorigcurtype True VARCHAR(3) Distribution Original Currency Type
10 disdivamt True NUMERIC(13, 6) Dividend Amount
11 disfacpr True NUMERIC(10, 6) Factor To Adjust Price
12 disfacshr True NUMERIC(10, 6) Factor To Adjust Shares
13 disdeclaredt True DATE Declaration Date
14 disrecorddt True DATE Record Date
15 dispaydt True DATE Payment Date
16 dispermno True INTEGER PERMNO of the Security Received
17 dispermco True INTEGER PERMCO of the Issuer Providing Payment
18 disamountsourcetype True VARCHAR(3) None
db.raw_sql("""
    SELECT *
    FROM crsp.stkdistributions
    WHERE permno = 14593
    AND disexdt >= '01/01/2020' AND disexdt <= '12/31/2021'
    """)
permno disexdt disseqnbr disordinaryflg distype disfreqtype dispaymenttype disdetailtype distaxtype disorigcurtype disdivamt disfacpr disfacshr disdeclaredt disrecorddt dispaydt dispermno dispermco disamountsourcetype
0 14593 2020-02-07 1 Y CD Q USD CDIV D USD 0.770 0.0 0.0 2020-01-28 2020-02-10 2020-02-13 0 0 N/A
1 14593 2020-05-08 1 Y CD Q USD CDIV D USD 0.820 0.0 0.0 2020-04-30 2020-05-11 2020-05-14 0 0 N/A
2 14593 2020-08-07 1 Y CD Q USD CDIV D USD 0.820 0.0 0.0 2020-07-30 2020-08-10 2020-08-13 0 0 N/A
3 14593 2020-08-31 1 N FRS N/A SS STKSPL N N/A NaN 3.0 3.0 2020-07-30 2020-08-24 2020-08-28 0 0 N/A
4 14593 2020-11-06 1 Y CD Q USD CDIV D USD 0.205 0.0 0.0 2020-10-29 2020-11-09 2020-11-12 0 0 N/A
5 14593 2021-02-05 1 Y CD Q USD CDIV D USD 0.205 0.0 0.0 2021-01-27 2021-02-08 2021-02-11 0 0 N/A
6 14593 2021-05-07 1 Y CD Q USD CDIV D USD 0.220 0.0 0.0 2021-04-28 2021-05-10 2021-05-13 0 0 N/A
7 14593 2021-08-06 1 Y CD Q USD CDIV D USD 0.220 0.0 0.0 2021-07-27 2021-08-09 2021-08-12 0 0 N/A
8 14593 2021-11-05 1 Y CD Q USD CDIV D USD 0.220 0.0 0.0 2021-10-28 2021-11-08 2021-11-11 0 0 N/A